In [1]:
import torch
import random
import sentencepiece as spm
from transformers import ReformerConfig, ReformerModelWithLMHead, ReformerTokenizer
from torch.utils.data import DataLoader, Dataset

NUM_BATCHES = None
BATCH_SIZE = 6
GRADIENT_ACCUMULATE_EVERY = 3
LEARNING_RATE = 1e-4
VALIDATE_EVERY  = 20
GENERATE_EVERY  = 50
GENERATE_LENGTH = 512
SEQ_LEN = 24576

In [2]:
spm.SentencePieceTrainer.Train("--input=./data/tokenizer_training/AAresiduals.txt \
                                --vocab_size=28 \
                                --model_prefix=sequence_tokenizer \
                                --model_type=char \
                                --character_coverage=1.0")
tokenizer = ReformerTokenizer(vocab_file="sequence_tokenizer.model", do_lower_case=False, model_max_length=SEQ_LEN)

In [3]:
# configuration = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment")
# configuration.axial_pos_shape=(128, 192)
# configuration.max_position_embeddings=SEQ_LEN
# configuration.vocab_size=tokenizer.vocab_size
# configuration.save_pretrained('model/config/')

In [4]:
configuration = ReformerConfig.from_pretrained('model/config/')
model = ReformerModelWithLMHead(configuration)

In [5]:
# input_ids = torch.tensor(tokenizer.encode("ALKLAKALK", 
#                                           add_special_tokens=True, 
#                                           max_length=SEQ_LEN, 
#                                           pad_to_max_length=True)).unsqueeze(0)  # Batch size 1

In [6]:
# model.cuda()
# outputs = model(input_ids.cuda(), labels=input_ids.cuda())
# loss, prediction_scores = outputs[:2]

In [7]:
def split_file(file,out1,out2,percentage=0.75,isShuffle=True,seed=42):
    """quora.com/How-can-split-a-text-file-randomly-in-75-and-25-and-create-two-output-file-in-python
    """
    random.seed(seed)
    with open(file, 'r',encoding="utf-8") as fin, open(out1, 'w') as foutBig, open(out2, 'w') as foutSmall:
        nLines = sum(1 for line in fin)
        fin.seek(0)

        nTrain = int(nLines*percentage) 
        nValid = nLines - nTrain

        i = 0
        for line in fin:
            r = random.random() if isShuffle else 0 # so that always evaluated to true when not isShuffle
            if (i < nTrain and r < percentage) or (nLines - i > nValid):
                foutBig.write(line)
                i += 1
            else:
                foutSmall.write(line)
                
split_file("data/yeast/yeast.txt", 
           "data/yeast/yeast_train.txt",
           "data/yeast/yeast_val.txt",
           percentage=0.9)

In [8]:
def cycle(loader):
    while True:
        for data in loader:
            yield data

In [9]:
class SequenceDataset(Dataset):
    def __init__(self, inputs, tokenizer, _len):
        super().__init__()
        self.inputs = inputs
        self.tokenizer = tokenizer
        self._len = _len

    @classmethod
    def prepare_from_file(cls, file_path, tokenizer):
        with open(file_path) as file:
            X = [l.strip() for l in file]
            X = [tokenizer.encode(sequence, 
                                  max_length=tokenizer.max_len, 
                                  add_special_tokens=True, 
                                  pad_to_max_length=True) for sequence in X]
            X = [torch.tensor(sequence) for sequence in X]
        inputs = torch.stack([X[i] for i in range(len(X))]).squeeze()
        return cls(inputs, tokenizer, len(inputs))

    def __getitem__(self, index):
        return self.inputs[index, ].cuda()

    def __len__(self):
        return self._len

In [10]:
train_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_train.txt", tokenizer)
val_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_val.txt", tokenizer)

In [11]:
train_loader = cycle(DataLoader(train_dataset, batch_size=BATCH_SIZE))
val_loader = cycle(DataLoader(val_dataset, batch_size=BATCH_SIZE))

In [12]:
next(train_loader), next(val_loader)

(tensor([[ 0, 14,  3,  ...,  0,  0,  0],
         [ 0, 14, 26,  ...,  0,  0,  0],
         [ 0, 14, 20,  ...,  0,  0,  0],
         [ 0, 14, 15,  ...,  0,  0,  0],
         [ 0, 14, 10,  ...,  0,  0,  0],
         [ 0, 14, 11,  ...,  0,  0,  0]], device='cuda:0'),
 tensor([[ 0, 14,  6,  ...,  0,  0,  0],
         [ 0, 14, 12,  ...,  0,  0,  0],
         [ 0, 14, 20,  ...,  0,  0,  0],
         [ 0, 14, 12,  ...,  0,  0,  0],
         [ 0, 14, 23,  ...,  0,  0,  0],
         [ 0, 14, 20,  ...,  0,  0,  0]], device='cuda:0'))

In [13]:
len(train_dataset), len(val_dataset)

(5444, 605)

In [14]:
from transformers import AdamW
from torch.nn import CrossEntropyLoss

optimizer = AdamW(params=model.parameters(), lr=LEARNING_RATE)

NUM_BATCHES = len(train_dataset)//BATCH_SIZE

In [15]:
model.cuda()

ReformerModelWithLMHead(
  (reformer): ReformerModel(
    (embeddings): ReformerEmbeddings(
      (word_embeddings): Embedding(28, 256)
      (position_embeddings): AxialPositionEmbeddings(
        (weights): ParameterList(
            (0): Parameter containing: [torch.cuda.FloatTensor of size 128x1x64 (GPU 0)]
            (1): Parameter containing: [torch.cuda.FloatTensor of size 1x192x192 (GPU 0)]
        )
      )
    )
    (encoder): ReformerEncoder(
      (layers): ModuleList(
        (0): ReformerLayer(
          (attention): ReformerAttention(
            (layer_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
            (self_attention): LocalSelfAttention(
              (query): Linear(in_features=256, out_features=128, bias=False)
              (key): Linear(in_features=256, out_features=128, bias=False)
              (value): Linear(in_features=256, out_features=128, bias=False)
            )
            (output): ReformerSelfOutput(
              (dense): Linea

In [16]:
import tqdm
for i in tqdm.tqdm(range(NUM_BATCHES), mininterval=10., desc='training'):
    
    model.train()
    
    for __ in range(GRADIENT_ACCUMULATE_EVERY):
        inputs = next(train_loader)
        outputs = model(inputs, labels=inputs)
        loss, prediction_scores = outputs[:2]
        loss.backward()

    print(f'training loss: {loss.item()}')
    
    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    
    optimizer.step()
    optimizer.zero_grad()

    if i % VALIDATE_EVERY == 0:
        model.eval()
        with torch.no_grad():
            inputs = next(val_loader)
            outputs = model(inputs, labels=inputs)
            loss, prediction_scores = outputs[:2]
            print(f'validation loss: {loss.item()}')

training:   0%|                                                                                                                                  | 0/907 [00:00<?, ?it/s]

training loss: 3.415182113647461
validation loss: 3.25498628616333
training loss: 3.1905267238616943


training:   0%|▍                                                                                                                         | 3/907 [00:10<53:09,  3.53s/it]

training loss: 2.984243869781494
training loss: 2.7950422763824463
training loss: 2.6422574520111084


training:   1%|▊                                                                                                                         | 6/907 [00:20<52:13,  3.48s/it]

training loss: 2.5364983081817627
training loss: 2.3368968963623047
training loss: 2.1750779151916504


training:   1%|█▏                                                                                                                        | 9/907 [00:30<51:31,  3.44s/it]

training loss: 2.0151383876800537
training loss: 1.903231143951416
training loss: 1.7333574295043945


training:   1%|█▌                                                                                                                       | 12/907 [00:40<51:03,  3.42s/it]

training loss: 1.5768060684204102
training loss: 1.4650559425354004
training loss: 1.3049408197402954


training:   2%|██                                                                                                                       | 15/907 [00:51<50:42,  3.41s/it]

training loss: 1.1829488277435303
training loss: 1.0360374450683594
training loss: 1.0044329166412354


training:   2%|██▍                                                                                                                      | 18/907 [01:01<50:22,  3.40s/it]

training loss: 0.8263130187988281
training loss: 0.625495195388794
training loss: 0.6728411912918091
training loss: 0.4373031556606293


training:   2%|██▊                                                                                                                      | 21/907 [01:11<50:29,  3.42s/it]

validation loss: 0.40881702303886414
training loss: 0.37705838680267334
training loss: 0.3208352327346802


training:   3%|███▏                                                                                                                     | 24/907 [01:21<50:06,  3.40s/it]

training loss: 0.38837361335754395
training loss: 0.2038479447364807
training loss: 0.17214249074459076


training:   3%|███▌                                                                                                                     | 27/907 [01:31<49:46,  3.39s/it]

training loss: 0.2065591812133789
training loss: 0.09774124622344971
training loss: 0.11031652241945267


training:   3%|████                                                                                                                     | 30/907 [01:41<49:28,  3.39s/it]

training loss: 0.12114151567220688
training loss: 0.24084527790546417
training loss: 0.2678217887878418


training:   4%|████▍                                                                                                                    | 33/907 [01:51<49:15,  3.38s/it]

training loss: 0.272189736366272
training loss: 0.2389449030160904
training loss: 0.10218578577041626


training:   4%|████▊                                                                                                                    | 36/907 [02:02<49:08,  3.39s/it]

training loss: 0.19721095263957977
training loss: 0.22316300868988037
training loss: 0.09984390437602997


training:   4%|█████▏                                                                                                                   | 39/907 [02:12<48:57,  3.38s/it]

training loss: 0.19583845138549805
training loss: 0.14737698435783386
training loss: 0.14421579241752625
validation loss: 0.20215922594070435


training:   5%|█████▌                                                                                                                   | 42/907 [02:22<49:08,  3.41s/it]

training loss: 0.14634400606155396
training loss: 0.32921627163887024
training loss: 0.211281880736351


training:   5%|██████                                                                                                                   | 45/907 [02:32<48:48,  3.40s/it]

training loss: 0.10612472146749496
training loss: 0.2071407586336136
training loss: 0.16467268764972687


training:   5%|██████▍                                                                                                                  | 48/907 [02:42<48:30,  3.39s/it]

training loss: 0.2078443169593811
training loss: 0.1727721095085144
training loss: 0.1282343715429306


training:   6%|██████▊                                                                                                                  | 51/907 [02:53<48:14,  3.38s/it]

training loss: 0.11701738089323044
training loss: 0.10800930857658386
training loss: 0.1391645073890686


training:   6%|███████▏                                                                                                                 | 54/907 [03:03<48:00,  3.38s/it]

training loss: 0.19350308179855347
training loss: 0.11506608128547668
training loss: 0.12521225214004517


training:   6%|███████▌                                                                                                                 | 57/907 [03:13<47:49,  3.38s/it]

training loss: 0.09514007717370987
training loss: 0.10598386079072952
training loss: 0.20461292564868927


training:   7%|████████                                                                                                                 | 60/907 [03:23<47:41,  3.38s/it]

training loss: 0.11109176278114319
training loss: 0.08375932276248932
validation loss: 0.08753838390111923
training loss: 0.08387809991836548


training:   7%|████████▍                                                                                                                | 63/907 [03:33<47:57,  3.41s/it]

training loss: 0.15372444689273834
training loss: 0.06937780976295471
training loss: 0.10831975191831589


training:   7%|████████▊                                                                                                                | 66/907 [03:43<47:37,  3.40s/it]

training loss: 0.09385103732347488
training loss: 0.10155728459358215
training loss: 0.06715381145477295


training:   8%|█████████▏                                                                                                               | 69/907 [03:54<47:22,  3.39s/it]

training loss: 0.1440684199333191
training loss: 0.1433064341545105
training loss: 0.13345873355865479


training:   8%|█████████▌                                                                                                               | 72/907 [04:04<47:18,  3.40s/it]

training loss: 0.12094171345233917
training loss: 0.18851135671138763
training loss: 0.07278772443532944


training:   8%|██████████                                                                                                               | 75/907 [04:14<47:03,  3.39s/it]

training loss: 0.13861168920993805
training loss: 0.06440434604883194
training loss: 0.10166709125041962


training:   9%|██████████▍                                                                                                              | 78/907 [04:24<46:46,  3.39s/it]

training loss: 0.1053539365530014
training loss: 0.11536230146884918
training loss: 0.08425778895616531
training loss: 0.05772557854652405


training:   9%|██████████▊                                                                                                              | 81/907 [04:34<46:57,  3.41s/it]

validation loss: 0.12106866389513016
training loss: 0.16715319454669952
training loss: 0.08723653107881546


training:   9%|███████████▏                                                                                                             | 84/907 [04:45<46:40,  3.40s/it]

training loss: 0.06477591395378113
training loss: 0.08237913995981216
training loss: 0.154417484998703


training:  10%|███████████▌                                                                                                             | 87/907 [04:55<46:23,  3.40s/it]

training loss: 0.09164154529571533
training loss: 0.09844108670949936
training loss: 0.0830579325556755


training:  10%|████████████                                                                                                             | 90/907 [05:05<46:08,  3.39s/it]

training loss: 0.09404442459344864
training loss: 0.07912231981754303
training loss: 0.0864378958940506


training:  10%|████████████▍                                                                                                            | 93/907 [05:15<45:53,  3.38s/it]

training loss: 0.07330195605754852
training loss: 0.14693902432918549
training loss: 0.1546115279197693


training:  11%|████████████▊                                                                                                            | 96/907 [05:25<45:39,  3.38s/it]

training loss: 0.13581416010856628
training loss: 0.05693134292960167
training loss: 0.08377066999673843


training:  11%|█████████████▏                                                                                                           | 99/907 [05:35<45:26,  3.37s/it]

training loss: 0.06403832882642746
training loss: 0.06057371199131012
training loss: 0.056324250996112823
validation loss: 0.11256010830402374


training:  11%|█████████████▍                                                                                                          | 102/907 [05:46<45:37,  3.40s/it]

training loss: 0.05284120514988899
training loss: 0.07848445326089859
training loss: 0.05380148068070412


training:  12%|█████████████▉                                                                                                          | 105/907 [05:56<45:24,  3.40s/it]

training loss: 0.04793526977300644
training loss: 0.10387643426656723
training loss: 0.0628696009516716


training:  12%|██████████████▎                                                                                                         | 108/907 [06:06<45:11,  3.39s/it]

training loss: 0.0604732409119606
training loss: 0.05847280099987984
training loss: 0.07922787964344025


training:  12%|██████████████▋                                                                                                         | 111/907 [06:16<44:56,  3.39s/it]

training loss: 0.07228385657072067
training loss: 0.08338803052902222
training loss: 0.059296850115060806


training:  13%|███████████████                                                                                                         | 114/907 [06:26<44:42,  3.38s/it]

training loss: 0.07207285612821579
training loss: 0.06271497160196304
training loss: 0.052956078201532364


training:  13%|███████████████▍                                                                                                        | 117/907 [06:36<44:29,  3.38s/it]

training loss: 0.06386356800794601
training loss: 0.055031441152095795
training loss: 0.034567542374134064


training:  13%|███████████████▉                                                                                                        | 120/907 [06:46<44:16,  3.38s/it]

training loss: 0.06306621432304382
training loss: 0.046875908970832825
validation loss: 0.05783437564969063
training loss: 0.07380910217761993


training:  14%|████████████████▎                                                                                                       | 123/907 [06:57<44:26,  3.40s/it]

training loss: 0.055837053805589676
training loss: 0.0922764241695404
training loss: 0.06727953255176544


training:  14%|████████████████▋                                                                                                       | 126/907 [07:07<44:07,  3.39s/it]

training loss: 0.05167859420180321
training loss: 0.052942097187042236
training loss: 0.05080070346593857


training:  14%|█████████████████                                                                                                       | 129/907 [07:17<43:55,  3.39s/it]

training loss: 0.050297897309064865
training loss: 0.17143501341342926
training loss: 0.06985819339752197


training:  15%|█████████████████▍                                                                                                      | 132/907 [07:27<43:44,  3.39s/it]

training loss: 0.06975625455379486
training loss: 0.04951460286974907
training loss: 0.05490691587328911


training:  15%|█████████████████▊                                                                                                      | 135/907 [07:37<43:31,  3.38s/it]

training loss: 0.06657347828149796
training loss: 0.04319703206419945
training loss: 0.07550621032714844


training:  15%|██████████████████▎                                                                                                     | 138/907 [07:47<43:18,  3.38s/it]

training loss: 0.06988628953695297
training loss: 0.078358493745327
training loss: 0.1119714081287384
training loss: 0.03031752072274685


training:  16%|██████████████████▋                                                                                                     | 141/907 [07:58<43:29,  3.41s/it]

validation loss: 0.07002123445272446
training loss: 0.0683242678642273
training loss: 0.06856662780046463


training:  16%|███████████████████                                                                                                     | 144/907 [08:08<43:10,  3.40s/it]

training loss: 0.07060953974723816
training loss: 0.11291681230068207
training loss: 0.040532127022743225


training:  16%|███████████████████▍                                                                                                    | 147/907 [08:18<42:53,  3.39s/it]

training loss: 0.049263566732406616
training loss: 0.06680973619222641
training loss: 0.05237019434571266


training:  17%|███████████████████▊                                                                                                    | 150/907 [08:28<42:40,  3.38s/it]

training loss: 0.049179404973983765
training loss: 0.043884266167879105
training loss: 0.046656932681798935


training:  17%|████████████████████▏                                                                                                   | 153/907 [08:38<42:30,  3.38s/it]

training loss: 0.04414438456296921
training loss: 0.037927862256765366
training loss: 0.06539441645145416


training:  17%|████████████████████▋                                                                                                   | 156/907 [08:48<42:19,  3.38s/it]

training loss: 0.05347999557852745
training loss: 0.07087817788124084
training loss: 0.07373170554637909


training:  18%|█████████████████████                                                                                                   | 159/907 [08:59<42:08,  3.38s/it]

training loss: 0.11611182987689972
training loss: 0.049404487013816833
training loss: 0.045435789972543716
validation loss: 0.08426174521446228


training:  18%|█████████████████████▍                                                                                                  | 162/907 [09:09<42:16,  3.40s/it]

training loss: 0.061986710876226425
training loss: 0.0769907757639885
training loss: 0.05980948358774185


training:  18%|█████████████████████▊                                                                                                  | 165/907 [09:19<41:57,  3.39s/it]

training loss: 0.03623552992939949
training loss: 0.06018418073654175
training loss: 0.10486136376857758


training:  19%|██████████████████████▏                                                                                                 | 168/907 [09:29<41:41,  3.39s/it]

training loss: 0.07816479355096817
training loss: 0.061005447059869766
training loss: 0.049415502697229385


training:  19%|██████████████████████▌                                                                                                 | 171/907 [09:39<41:27,  3.38s/it]

training loss: 0.07894298434257507
training loss: 0.04025472328066826
training loss: 0.041178423911333084


training:  19%|███████████████████████                                                                                                 | 174/907 [09:49<41:14,  3.38s/it]

training loss: 0.06286509335041046
training loss: 0.03124907985329628
training loss: 0.060483168810606


training:  20%|███████████████████████▍                                                                                                | 177/907 [09:59<41:07,  3.38s/it]

training loss: 0.053165413439273834
training loss: 0.06107719987630844
training loss: 0.10312402248382568


training:  20%|███████████████████████▊                                                                                                | 180/907 [10:10<40:57,  3.38s/it]

training loss: 0.06821539998054504
training loss: 0.04750929772853851
validation loss: 0.05989896506071091
training loss: 0.039688460528850555


training:  20%|████████████████████████▏                                                                                               | 183/907 [10:20<41:06,  3.41s/it]

training loss: 0.07723154872655869
training loss: 0.09008575230836868
training loss: 0.05819205939769745


training:  21%|████████████████████████▌                                                                                               | 186/907 [10:30<40:48,  3.40s/it]

training loss: 0.10735098272562027
training loss: 0.036239784210920334
training loss: 0.04300837218761444


training:  21%|█████████████████████████                                                                                               | 189/907 [10:40<40:31,  3.39s/it]

training loss: 0.06399450451135635
training loss: 0.05613964796066284
training loss: 0.0667990893125534


training:  21%|█████████████████████████▍                                                                                              | 192/907 [10:50<40:17,  3.38s/it]

training loss: 0.09031172841787338
training loss: 0.055195342749357224
training loss: 0.05507213622331619


training:  21%|█████████████████████████▊                                                                                              | 195/907 [11:00<40:04,  3.38s/it]

training loss: 0.09794124215841293
training loss: 0.056826796382665634
training loss: 0.05758810043334961


training:  22%|██████████████████████████▏                                                                                             | 198/907 [11:11<39:54,  3.38s/it]

training loss: 0.04939531162381172
training loss: 0.07418223470449448
training loss: 0.05500141903758049
training loss: 0.051817506551742554


training:  22%|██████████████████████████▌                                                                                             | 201/907 [11:21<40:04,  3.41s/it]

validation loss: 0.06661319732666016
training loss: 0.04661671444773674
training loss: 0.05700898915529251


training:  22%|██████████████████████████▉                                                                                             | 204/907 [11:31<39:48,  3.40s/it]

training loss: 0.09186792373657227
training loss: 0.05421578884124756
training loss: 0.053051989525556564


training:  23%|███████████████████████████▍                                                                                            | 207/907 [11:41<39:33,  3.39s/it]

training loss: 0.04288758710026741
training loss: 0.034847505390644073
training loss: 0.03437875211238861


training:  23%|███████████████████████████▊                                                                                            | 210/907 [11:51<39:19,  3.39s/it]

training loss: 0.07315213233232498
training loss: 0.06064307317137718
training loss: 0.05717159062623978


training:  23%|████████████████████████████▏                                                                                           | 213/907 [12:01<39:07,  3.38s/it]

training loss: 0.051677439361810684
training loss: 0.04521990939974785
training loss: 0.06839998811483383


training:  24%|████████████████████████████▌                                                                                           | 216/907 [12:12<38:53,  3.38s/it]

training loss: 0.05677996575832367
training loss: 0.04358687251806259
training loss: 0.0668056383728981


training:  24%|████████████████████████████▉                                                                                           | 219/907 [12:22<38:41,  3.37s/it]

training loss: 0.037570737302303314
training loss: 0.04610850289463997
training loss: 0.05126726254820824
validation loss: 0.06189752370119095


training:  24%|█████████████████████████████▎                                                                                          | 222/907 [12:32<38:56,  3.41s/it]

training loss: 0.06054844334721565
training loss: 0.05635199323296547
training loss: 0.0668582171201706


training:  25%|█████████████████████████████▊                                                                                          | 225/907 [12:43<38:57,  3.43s/it]

training loss: 0.062469806522130966
training loss: 0.06919053941965103
training loss: 0.06920777261257172


training:  25%|██████████████████████████████▏                                                                                         | 228/907 [12:53<38:38,  3.41s/it]

training loss: 0.05220109596848488
training loss: 0.04535859450697899
training loss: 0.029627319425344467


training:  25%|██████████████████████████████▌                                                                                         | 231/907 [13:03<38:18,  3.40s/it]

training loss: 0.062029097229242325
training loss: 0.05159652978181839
training loss: 0.04094919562339783


training:  26%|██████████████████████████████▉                                                                                         | 234/907 [13:13<38:04,  3.39s/it]

training loss: 0.07348514348268509
training loss: 0.035969749093055725
training loss: 0.037718482315540314


training:  26%|███████████████████████████████▎                                                                                        | 237/907 [13:23<37:48,  3.39s/it]

training loss: 0.041744399815797806
training loss: 0.07373981922864914
training loss: 0.08469191193580627


training:  26%|███████████████████████████████▊                                                                                        | 240/907 [13:33<37:34,  3.38s/it]

training loss: 0.0790758803486824
training loss: 0.0393933542072773
validation loss: 0.05624441057443619
training loss: 0.13251428306102753


training:  27%|████████████████████████████████▏                                                                                       | 243/907 [13:44<37:40,  3.40s/it]

training loss: 0.047375645488500595
training loss: 0.09741155058145523
training loss: 0.08664784580469131


training:  27%|████████████████████████████████▌                                                                                       | 246/907 [13:54<37:26,  3.40s/it]

training loss: 0.05957052484154701
training loss: 0.0700085461139679
training loss: 0.045527033507823944


training:  27%|████████████████████████████████▉                                                                                       | 249/907 [14:04<37:13,  3.39s/it]

training loss: 0.06794662028551102
training loss: 0.048601631075143814
training loss: 0.0569806843996048


training:  28%|█████████████████████████████████▎                                                                                      | 252/907 [14:14<36:59,  3.39s/it]

training loss: 0.05715392902493477
training loss: 0.06730632483959198
training loss: 0.05492285639047623


training:  28%|█████████████████████████████████▋                                                                                      | 255/907 [14:24<36:46,  3.38s/it]

training loss: 0.06144111230969429
training loss: 0.04738030955195427
training loss: 0.07571575045585632


training:  28%|██████████████████████████████████▏                                                                                     | 258/907 [14:34<36:33,  3.38s/it]

training loss: 0.05477036163210869
training loss: 0.05910317972302437
training loss: 0.06114526465535164
training loss: 0.09208372235298157


training:  29%|██████████████████████████████████▌                                                                                     | 261/907 [14:45<36:38,  3.40s/it]

validation loss: 0.05028299614787102
training loss: 0.07960840314626694
training loss: 0.05894583463668823


training:  29%|██████████████████████████████████▉                                                                                     | 264/907 [14:55<36:21,  3.39s/it]

training loss: 0.032562386244535446
training loss: 0.033678047358989716
training loss: 0.05356459692120552


training:  29%|███████████████████████████████████▎                                                                                    | 267/907 [15:05<36:06,  3.39s/it]

training loss: 0.03795328736305237
training loss: 0.053233496844768524
training loss: 0.06744008511304855


training:  30%|███████████████████████████████████▋                                                                                    | 270/907 [15:15<35:55,  3.38s/it]

training loss: 0.04310755804181099
training loss: 0.06424322724342346
training loss: 0.08296642452478409


training:  30%|████████████████████████████████████                                                                                    | 273/907 [15:25<35:45,  3.38s/it]

training loss: 0.0445563979446888
training loss: 0.04486432299017906
training loss: 0.0815172865986824


training:  30%|████████████████████████████████████▌                                                                                   | 276/907 [15:35<35:31,  3.38s/it]

training loss: 0.09306637197732925
training loss: 0.0755491703748703
training loss: 0.04350823909044266


training:  31%|████████████████████████████████████▉                                                                                   | 279/907 [15:45<35:20,  3.38s/it]

training loss: 0.06597297638654709
training loss: 0.04093882814049721
training loss: 0.09948456287384033
validation loss: 0.14278621971607208


training:  31%|█████████████████████████████████████▎                                                                                  | 282/907 [15:56<35:27,  3.40s/it]

training loss: 0.10785975307226181
training loss: 0.03822832927107811
training loss: 0.052604127675294876


training:  31%|█████████████████████████████████████▋                                                                                  | 285/907 [16:06<35:10,  3.39s/it]

training loss: 0.04238015040755272
training loss: 0.06814759969711304
training loss: 0.06436030566692352


training:  32%|██████████████████████████████████████                                                                                  | 288/907 [16:16<34:55,  3.39s/it]

training loss: 0.05082175135612488
training loss: 0.039180729538202286
training loss: 0.050003133714199066


training:  32%|██████████████████████████████████████▌                                                                                 | 291/907 [16:26<34:42,  3.38s/it]

training loss: 0.025289710611104965
training loss: 0.06319819390773773
training loss: 0.07399607449769974


training:  32%|██████████████████████████████████████▉                                                                                 | 294/907 [16:36<34:33,  3.38s/it]

training loss: 0.0551941804587841
training loss: 0.054202787578105927
training loss: 0.03949686512351036


training:  33%|███████████████████████████████████████▎                                                                                | 297/907 [16:46<34:22,  3.38s/it]

training loss: 0.0574575811624527
training loss: 0.04611125588417053
training loss: 0.05074339732527733


training:  33%|███████████████████████████████████████▋                                                                                | 300/907 [16:56<34:10,  3.38s/it]

training loss: 0.0647677555680275
training loss: 0.04328961670398712
validation loss: 0.15739652514457703
training loss: 0.11160939186811447
training loss: 0.05311135575175285


training:  34%|████████████████████████████████████████▏                                                                               | 304/907 [17:09<33:34,  3.34s/it]

training loss: 0.023337967693805695
training loss: 0.021433988586068153
training loss: 0.030034959316253662


training:  34%|████████████████████████████████████████▏                                                                               | 304/907 [17:20<33:34,  3.34s/it]

training loss: 0.04401553422212601


training:  34%|████████████████████████████████████████▋                                                                               | 308/907 [17:23<33:26,  3.35s/it]

training loss: 0.026589492335915565
training loss: 0.07198785245418549
training loss: 0.062016598880290985


training:  34%|█████████████████████████████████████████▏                                                                              | 311/907 [17:33<33:19,  3.36s/it]

training loss: 0.07946467399597168
training loss: 0.03569456934928894
training loss: 0.08179745823144913


training:  35%|█████████████████████████████████████████▌                                                                              | 314/907 [17:43<33:10,  3.36s/it]

training loss: 0.07284501940011978
training loss: 0.06722363084554672
training loss: 0.048060379922389984


training:  35%|█████████████████████████████████████████▉                                                                              | 317/907 [17:53<33:06,  3.37s/it]

training loss: 0.0706314817070961
training loss: 0.08757621794939041
training loss: 0.0698481947183609


training:  35%|██████████████████████████████████████████▎                                                                             | 320/907 [18:03<33:00,  3.37s/it]

training loss: 0.059167422354221344
training loss: 0.04796430841088295
validation loss: 0.06551963090896606
training loss: 0.08330096304416656


training:  36%|██████████████████████████████████████████▋                                                                             | 323/907 [18:14<33:08,  3.40s/it]

training loss: 0.009696763008832932
training loss: 0.017162922769784927
training loss: 0.015453805215656757


training:  36%|███████████████████████████████████████████▏                                                                            | 326/907 [18:24<32:53,  3.40s/it]

training loss: 0.0393233485519886
training loss: 0.12317060679197311
training loss: 0.025868598371744156


training:  36%|███████████████████████████████████████████▌                                                                            | 329/907 [18:34<32:40,  3.39s/it]

training loss: 0.04207850992679596
training loss: 0.050760481506586075
training loss: 0.021989811211824417


training:  37%|███████████████████████████████████████████▉                                                                            | 332/907 [18:44<32:27,  3.39s/it]

training loss: 0.03792652115225792
training loss: 0.06048756465315819
training loss: 0.04452051222324371


training:  37%|████████████████████████████████████████████▎                                                                           | 335/907 [18:54<32:15,  3.38s/it]

training loss: 0.10018511861562729
training loss: 0.0820884108543396
training loss: 0.07205996662378311


training:  37%|████████████████████████████████████████████▋                                                                           | 338/907 [19:05<32:04,  3.38s/it]

training loss: 0.10229267925024033
training loss: 0.07807324826717377
training loss: 0.06960782408714294
training loss: 0.07917743921279907


training:  38%|█████████████████████████████████████████████                                                                           | 341/907 [19:15<32:11,  3.41s/it]

validation loss: 0.038809068500995636
training loss: 0.06261855363845825
training loss: 0.06509066373109818


training:  38%|█████████████████████████████████████████████▌                                                                          | 344/907 [19:25<31:57,  3.41s/it]

training loss: 0.06370184570550919
training loss: 0.05265944078564644
training loss: 0.09115568548440933


training:  38%|█████████████████████████████████████████████▉                                                                          | 347/907 [19:35<31:42,  3.40s/it]

training loss: 0.04812845587730408
training loss: 0.06928105652332306
training loss: 0.06428056955337524


training:  39%|██████████████████████████████████████████████▎                                                                         | 350/907 [19:45<31:29,  3.39s/it]

training loss: 0.07504431903362274
training loss: 0.10112492740154266
training loss: 0.0678730309009552


training:  39%|██████████████████████████████████████████████▋                                                                         | 353/907 [19:56<31:18,  3.39s/it]

training loss: 0.09031189233064651
training loss: 0.07509683817625046
training loss: 0.06125924736261368


training:  39%|███████████████████████████████████████████████                                                                         | 356/907 [20:06<31:07,  3.39s/it]

training loss: 0.04474031552672386
training loss: 0.07166841626167297
training loss: 0.04130186885595322


training:  40%|███████████████████████████████████████████████▍                                                                        | 359/907 [20:16<30:54,  3.38s/it]

training loss: 0.05697529390454292
training loss: 0.11526989936828613
training loss: 0.05776992067694664
validation loss: 0.07238073647022247


training:  40%|███████████████████████████████████████████████▉                                                                        | 362/907 [20:26<30:59,  3.41s/it]

training loss: 0.053485434502363205
training loss: 0.06126202642917633
training loss: 0.04760752618312836


training:  40%|████████████████████████████████████████████████▎                                                                       | 365/907 [20:36<30:45,  3.41s/it]

training loss: 0.05621994659304619
training loss: 0.08356263488531113
training loss: 0.06078094616532326


training:  41%|████████████████████████████████████████████████▋                                                                       | 368/907 [20:47<30:32,  3.40s/it]

training loss: 0.06701969355344772
training loss: 0.043388817459344864
training loss: 0.0512838177382946


training:  41%|█████████████████████████████████████████████████                                                                       | 371/907 [20:57<30:19,  3.40s/it]

training loss: 0.05081693455576897
training loss: 0.05039156228303909
training loss: 0.06634700298309326


training:  41%|█████████████████████████████████████████████████▍                                                                      | 374/907 [21:07<30:06,  3.39s/it]

training loss: 0.07159839570522308
training loss: 0.08187542110681534
training loss: 0.0538761243224144


training:  42%|█████████████████████████████████████████████████▉                                                                      | 377/907 [21:17<29:53,  3.38s/it]

training loss: 0.07913748919963837
training loss: 0.04347430542111397
training loss: 0.05548056587576866


training:  42%|██████████████████████████████████████████████████▎                                                                     | 380/907 [21:27<29:42,  3.38s/it]

training loss: 0.0594477653503418
training loss: 0.09135899692773819
validation loss: 0.06247873976826668
training loss: 0.07970000058412552


training:  42%|██████████████████████████████████████████████████▋                                                                     | 383/907 [21:38<29:45,  3.41s/it]

training loss: 0.10598573833703995
training loss: 0.05721462145447731
training loss: 0.13328932225704193


training:  43%|███████████████████████████████████████████████████                                                                     | 386/907 [21:48<29:31,  3.40s/it]

training loss: 0.07245739549398422
training loss: 0.05714219808578491
training loss: 0.06933928281068802


training:  43%|███████████████████████████████████████████████████▍                                                                    | 389/907 [21:58<29:21,  3.40s/it]

training loss: 0.07444256544113159
training loss: 0.05789998173713684
training loss: 0.08420988917350769


training:  43%|███████████████████████████████████████████████████▊                                                                    | 392/907 [22:08<29:08,  3.40s/it]

training loss: 0.05901578068733215
training loss: 0.07778580486774445
training loss: 0.06613492965698242


training:  44%|████████████████████████████████████████████████████▎                                                                   | 395/907 [22:18<28:56,  3.39s/it]

training loss: 0.07239718735218048
training loss: 0.07960182428359985
training loss: 0.05842599645256996


training:  44%|████████████████████████████████████████████████████▋                                                                   | 398/907 [22:28<28:43,  3.39s/it]

training loss: 0.04753187671303749
training loss: 0.05143583565950394
training loss: 0.06382714211940765
training loss: 0.07900574058294296


training:  44%|█████████████████████████████████████████████████████                                                                   | 401/907 [22:39<28:45,  3.41s/it]

validation loss: 0.09437404572963715
training loss: 0.14486703276634216
training loss: 0.08515827357769012


training:  45%|█████████████████████████████████████████████████████▍                                                                  | 404/907 [22:49<28:29,  3.40s/it]

training loss: 0.048460353165864944
training loss: 0.07375764101743698
training loss: 0.06138185039162636


training:  45%|█████████████████████████████████████████████████████▊                                                                  | 407/907 [22:59<28:16,  3.39s/it]

training loss: 0.0442684143781662
training loss: 0.07369998097419739
training loss: 0.041963908821344376


training:  45%|██████████████████████████████████████████████████████▏                                                                 | 410/907 [23:09<28:06,  3.39s/it]

training loss: 0.04937862977385521
training loss: 0.07075657695531845
training loss: 0.05152606964111328


training:  46%|██████████████████████████████████████████████████████▋                                                                 | 413/907 [23:19<27:55,  3.39s/it]

training loss: 0.160662442445755
training loss: 0.07714205980300903
training loss: 0.05265031009912491


training:  46%|███████████████████████████████████████████████████████                                                                 | 416/907 [23:29<27:44,  3.39s/it]

training loss: 0.06745486706495285
training loss: 0.04674175754189491
training loss: 0.056201785802841187


training:  46%|███████████████████████████████████████████████████████▍                                                                | 419/907 [23:40<27:32,  3.39s/it]

training loss: 0.06358916312456131
training loss: 0.06194340065121651
training loss: 0.046331290155649185
validation loss: 0.06338772922754288


training:  47%|███████████████████████████████████████████████████████▊                                                                | 422/907 [23:50<27:34,  3.41s/it]

training loss: 0.05044829472899437
training loss: 0.05726442486047745
training loss: 0.053752556443214417


training:  47%|████████████████████████████████████████████████████████▏                                                               | 425/907 [24:00<27:20,  3.40s/it]

training loss: 0.04375486820936203
training loss: 0.0789661630988121
training loss: 0.05698798969388008


training:  47%|████████████████████████████████████████████████████████▋                                                               | 428/907 [24:10<27:05,  3.39s/it]

training loss: 0.07213641703128815
training loss: 0.10632456094026566
training loss: 0.04732702299952507


training:  48%|█████████████████████████████████████████████████████████                                                               | 431/907 [24:20<26:52,  3.39s/it]

training loss: 0.05091726407408714
training loss: 0.052579984068870544
training loss: 0.05975358933210373


training:  48%|█████████████████████████████████████████████████████████▍                                                              | 434/907 [24:31<26:42,  3.39s/it]

training loss: 0.04139293357729912
training loss: 0.08548793196678162
training loss: 0.07574957609176636


training:  48%|█████████████████████████████████████████████████████████▊                                                              | 437/907 [24:41<26:32,  3.39s/it]

training loss: 0.06570401042699814
training loss: 0.061223939061164856
training loss: 0.07293638586997986


training:  49%|██████████████████████████████████████████████████████████▏                                                             | 440/907 [24:51<26:20,  3.39s/it]

training loss: 0.08247841894626617
training loss: 0.06932663917541504
validation loss: 0.04224865138530731
training loss: 0.0696287602186203


training:  49%|██████████████████████████████████████████████████████████▌                                                             | 443/907 [25:01<26:22,  3.41s/it]

training loss: 0.10878503322601318
training loss: 0.07533213496208191
training loss: 0.07103670388460159


training:  49%|███████████████████████████████████████████████████████████                                                             | 446/907 [25:11<26:07,  3.40s/it]

training loss: 0.05981355533003807
training loss: 0.057628195732831955
training loss: 0.07915384322404861


training:  50%|███████████████████████████████████████████████████████████▍                                                            | 449/907 [25:22<25:53,  3.39s/it]

training loss: 0.07670564949512482
training loss: 0.029382901266217232
training loss: 0.05780012160539627


training:  50%|███████████████████████████████████████████████████████████▊                                                            | 452/907 [25:32<25:41,  3.39s/it]

training loss: 0.028264427557587624
training loss: 0.08376504480838776
training loss: 0.03187058866024017


training:  50%|████████████████████████████████████████████████████████████▏                                                           | 455/907 [25:42<25:29,  3.38s/it]

training loss: 0.030570726841688156
training loss: 0.048944905400276184
training loss: 0.04207385703921318


training:  50%|████████████████████████████████████████████████████████████▌                                                           | 458/907 [25:52<25:20,  3.39s/it]

training loss: 0.053833961486816406
training loss: 0.08128628134727478
training loss: 0.08334710448980331
training loss: 0.09254075586795807


training:  51%|████████████████████████████████████████████████████████████▉                                                           | 461/907 [26:02<25:24,  3.42s/it]

validation loss: 0.04365203157067299
training loss: 0.06837380677461624
training loss: 0.06380116194486618


training:  51%|█████████████████████████████████████████████████████████████▍                                                          | 464/907 [26:13<25:09,  3.41s/it]

training loss: 0.05823657661676407
training loss: 0.05362562835216522
training loss: 0.026520907878875732


training:  51%|█████████████████████████████████████████████████████████████▊                                                          | 467/907 [26:23<24:55,  3.40s/it]

training loss: 0.025779375806450844
training loss: 0.08947065472602844
training loss: 0.05070387199521065


training:  52%|██████████████████████████████████████████████████████████████▏                                                         | 470/907 [26:33<24:42,  3.39s/it]

training loss: 0.04169921204447746
training loss: 0.03470088168978691
training loss: 0.0878758504986763


training:  52%|██████████████████████████████████████████████████████████████▌                                                         | 473/907 [26:43<24:29,  3.39s/it]

training loss: 0.06383748352527618
training loss: 0.043406810611486435
training loss: 0.07729039341211319


training:  52%|██████████████████████████████████████████████████████████████▉                                                         | 476/907 [26:53<24:18,  3.38s/it]

training loss: 0.03595327213406563
training loss: 0.09058824926614761
training loss: 0.08371374011039734


training:  53%|███████████████████████████████████████████████████████████████▎                                                        | 479/907 [27:03<24:07,  3.38s/it]

training loss: 0.04835577309131622
training loss: 0.04903754964470863
training loss: 0.05805882811546326
validation loss: 0.05201767385005951


training:  53%|███████████████████████████████████████████████████████████████▊                                                        | 482/907 [27:14<24:10,  3.41s/it]

training loss: 0.061208393424749374
training loss: 0.032622456550598145
training loss: 0.04903462529182434


training:  53%|████████████████████████████████████████████████████████████████▏                                                       | 485/907 [27:24<23:57,  3.41s/it]

training loss: 0.04564899206161499
training loss: 0.08906956762075424
training loss: 0.042759962379932404


training:  54%|████████████████████████████████████████████████████████████████▌                                                       | 488/907 [27:34<23:43,  3.40s/it]

training loss: 0.04661451652646065
training loss: 0.11788982897996902
training loss: 0.03789213299751282


training:  54%|████████████████████████████████████████████████████████████████▉                                                       | 491/907 [27:44<23:31,  3.39s/it]

training loss: 0.05240820348262787
training loss: 0.04507352039217949
training loss: 0.04701375961303711


training:  54%|█████████████████████████████████████████████████████████████████▎                                                      | 494/907 [27:54<23:19,  3.39s/it]

training loss: 0.07086074352264404
training loss: 0.08254045993089676
training loss: 0.05542923882603645


training:  55%|█████████████████████████████████████████████████████████████████▊                                                      | 497/907 [28:04<23:08,  3.39s/it]

training loss: 0.06462661176919937
training loss: 0.03302557393908501
training loss: 0.0499751903116703


training:  55%|██████████████████████████████████████████████████████████████████▏                                                     | 500/907 [28:15<22:57,  3.38s/it]

training loss: 0.05832740664482117
training loss: 0.06162487715482712
validation loss: 0.04064860939979553
training loss: 0.09201568365097046


training:  55%|██████████████████████████████████████████████████████████████████▌                                                     | 503/907 [28:25<22:58,  3.41s/it]

training loss: 0.09282708168029785
training loss: 0.05296221375465393
training loss: 0.07571299374103546


training:  56%|██████████████████████████████████████████████████████████████████▉                                                     | 506/907 [28:35<22:45,  3.41s/it]

training loss: 0.07486417144536972
training loss: 0.062395110726356506
training loss: 0.03726628050208092


training:  56%|███████████████████████████████████████████████████████████████████▎                                                    | 509/907 [28:45<22:32,  3.40s/it]

training loss: 0.03311505913734436
training loss: 0.04616766795516014
training loss: 0.026743408292531967


training:  56%|███████████████████████████████████████████████████████████████████▋                                                    | 512/907 [28:55<22:20,  3.39s/it]

training loss: 0.06254217028617859
training loss: 0.05257948115468025
training loss: 0.06434847414493561


training:  57%|████████████████████████████████████████████████████████████████████▏                                                   | 515/907 [29:06<22:08,  3.39s/it]

training loss: 0.043855372816324234
training loss: 0.08722654730081558
training loss: 0.06422007083892822


training:  57%|████████████████████████████████████████████████████████████████████▌                                                   | 518/907 [29:16<21:56,  3.39s/it]

training loss: 0.09534705430269241
training loss: 0.0408579558134079
training loss: 0.05322525277733803
training loss: 0.08171910047531128


training:  57%|████████████████████████████████████████████████████████████████████▉                                                   | 521/907 [29:26<21:56,  3.41s/it]

validation loss: 0.06473391503095627
training loss: 0.08378209173679352
training loss: 0.07886982709169388


training:  58%|█████████████████████████████████████████████████████████████████████▎                                                  | 524/907 [29:36<21:41,  3.40s/it]

training loss: 0.046335600316524506
training loss: 0.031713418662548065
training loss: 0.07546741515398026


training:  58%|█████████████████████████████████████████████████████████████████████▋                                                  | 527/907 [29:46<21:31,  3.40s/it]

training loss: 0.05642155557870865
training loss: 0.048226628452539444
training loss: 0.0491633303463459


training:  58%|██████████████████████████████████████████████████████████████████████                                                  | 530/907 [29:57<21:21,  3.40s/it]

training loss: 0.06510580331087112
training loss: 0.019082164391875267
training loss: 0.015158643014729023


training:  59%|██████████████████████████████████████████████████████████████████████▌                                                 | 533/907 [30:07<21:08,  3.39s/it]

training loss: 0.12885722517967224
training loss: 0.10026397556066513
training loss: 0.04970598965883255


training:  59%|██████████████████████████████████████████████████████████████████████▉                                                 | 536/907 [30:17<20:57,  3.39s/it]

training loss: 0.05712461471557617
training loss: 0.041400570422410965
training loss: 0.05666438862681389


training:  59%|███████████████████████████████████████████████████████████████████████▎                                                | 539/907 [30:27<20:45,  3.38s/it]

training loss: 0.0779862105846405
training loss: 0.05642976984381676
training loss: 0.05833607539534569
validation loss: 0.055281978100538254


training:  60%|███████████████████████████████████████████████████████████████████████▋                                                | 542/907 [30:37<20:45,  3.41s/it]

training loss: 0.02855665050446987
training loss: 0.047661520540714264
training loss: 0.07901465892791748


training:  60%|████████████████████████████████████████████████████████████████████████                                                | 545/907 [30:48<20:30,  3.40s/it]

training loss: 0.0608169361948967
training loss: 0.04416431486606598
training loss: 0.06334618479013443


training:  60%|████████████████████████████████████████████████████████████████████████▌                                               | 548/907 [30:58<20:17,  3.39s/it]

training loss: 0.09352217614650726
training loss: 0.04311424866318703
training loss: 0.06009815260767937


training:  61%|████████████████████████████████████████████████████████████████████████▉                                               | 551/907 [31:08<20:07,  3.39s/it]

training loss: 0.04331474006175995
training loss: 0.07179991900920868
training loss: 0.04409561678767204


training:  61%|█████████████████████████████████████████████████████████████████████████▎                                              | 554/907 [31:18<19:57,  3.39s/it]

training loss: 0.06187077984213829
training loss: 0.06727983802556992
training loss: 0.06786534190177917


training:  61%|█████████████████████████████████████████████████████████████████████████▋                                              | 557/907 [31:28<19:45,  3.39s/it]

training loss: 0.059946153312921524
training loss: 0.04291801154613495
training loss: 0.06484698504209518


training:  62%|██████████████████████████████████████████████████████████████████████████                                              | 560/907 [31:38<19:34,  3.39s/it]

training loss: 0.050873078405857086
training loss: 0.10048234462738037
validation loss: 0.043129488825798035
training loss: 0.062077391892671585


training:  62%|██████████████████████████████████████████████████████████████████████████▍                                             | 563/907 [31:49<19:34,  3.41s/it]

training loss: 0.04105686768889427
training loss: 0.07055702060461044
training loss: 0.07102719694375992


training:  62%|██████████████████████████████████████████████████████████████████████████▉                                             | 566/907 [31:59<19:21,  3.41s/it]

training loss: 0.044170547276735306
training loss: 0.05482238531112671
training loss: 0.043710172176361084


training:  63%|███████████████████████████████████████████████████████████████████████████▎                                            | 569/907 [32:09<19:08,  3.40s/it]

training loss: 0.0639156848192215
training loss: 0.06208231300115585
training loss: 0.05940210819244385


training:  63%|███████████████████████████████████████████████████████████████████████████▋                                            | 572/907 [32:19<18:56,  3.39s/it]

training loss: 0.08528414368629456
training loss: 0.06317190825939178
training loss: 0.047554004937410355


training:  63%|████████████████████████████████████████████████████████████████████████████                                            | 575/907 [32:29<18:46,  3.39s/it]

training loss: 0.05243224278092384
training loss: 0.04258207604289055
training loss: 0.04084881395101547


training:  64%|████████████████████████████████████████████████████████████████████████████▍                                           | 578/907 [32:40<18:35,  3.39s/it]

training loss: 0.05640319734811783
training loss: 0.07067756354808807
training loss: 0.05251305177807808


training:  64%|████████████████████████████████████████████████████████████████████████████▍                                           | 578/907 [32:50<18:35,  3.39s/it]

training loss: 0.05133742094039917


training:  64%|████████████████████████████████████████████████████████████████████████████▊                                           | 581/907 [32:50<18:33,  3.42s/it]

validation loss: 0.06426765769720078
training loss: 0.0774228647351265
training loss: 0.08315169811248779


training:  64%|█████████████████████████████████████████████████████████████████████████████▎                                          | 584/907 [33:00<18:19,  3.40s/it]

training loss: 0.07695695012807846
training loss: 0.07876196503639221
training loss: 0.07367856800556183


training:  65%|█████████████████████████████████████████████████████████████████████████████▋                                          | 587/907 [33:10<18:07,  3.40s/it]

training loss: 0.04624810069799423
training loss: 0.06534121930599213
training loss: 0.07648338377475739


training:  65%|██████████████████████████████████████████████████████████████████████████████                                          | 590/907 [33:20<17:54,  3.39s/it]

training loss: 0.04400208964943886
training loss: 0.04722917452454567
training loss: 0.07408061623573303


training:  65%|██████████████████████████████████████████████████████████████████████████████▍                                         | 593/907 [33:30<17:43,  3.39s/it]

training loss: 0.042310524731874466
training loss: 0.0860498920083046
training loss: 0.046303827315568924


training:  66%|██████████████████████████████████████████████████████████████████████████████▊                                         | 596/907 [33:41<17:32,  3.38s/it]

training loss: 0.049197420477867126
training loss: 0.08933981508016586
training loss: 0.04219686985015869


training:  66%|███████████████████████████████████████████████████████████████████████████████▎                                        | 599/907 [33:51<17:21,  3.38s/it]

training loss: 0.17475581169128418
training loss: 0.056198541074991226
training loss: 0.04580308869481087
validation loss: 0.05442114174365997


training:  66%|███████████████████████████████████████████████████████████████████████████████▋                                        | 602/907 [34:01<17:21,  3.42s/it]

training loss: 0.07590307295322418
training loss: 0.10219591856002808
training loss: 0.03598898649215698
training loss: 0.07956940680742264


training:  67%|████████████████████████████████████████████████████████████████████████████████▏                                       | 606/907 [34:14<16:48,  3.35s/it]

training loss: 0.03175966814160347
training loss: 0.043502409011125565
training loss: 0.03032505325973034
training loss: 0.03826754540205002


training:  67%|████████████████████████████████████████████████████████████████████████████████▋                                       | 610/907 [34:28<16:37,  3.36s/it]

training loss: 0.0575677864253521
training loss: 0.028469843789935112
training loss: 0.06684781610965729


training:  68%|█████████████████████████████████████████████████████████████████████████████████                                       | 613/907 [34:38<16:29,  3.37s/it]

training loss: 0.04325263202190399
training loss: 0.05772649869322777
training loss: 0.01784312166273594


training:  68%|█████████████████████████████████████████████████████████████████████████████████▍                                      | 616/907 [34:48<16:19,  3.37s/it]

training loss: 0.047972969710826874
training loss: 0.07058262079954147
training loss: 0.08093659579753876


training:  68%|█████████████████████████████████████████████████████████████████████████████████▉                                      | 619/907 [34:58<16:11,  3.37s/it]

training loss: 0.09258156269788742
training loss: 0.0376921109855175
training loss: 0.07430043816566467
validation loss: 0.038186199963092804


training:  69%|██████████████████████████████████████████████████████████████████████████████████▎                                     | 622/907 [35:08<16:10,  3.41s/it]

training loss: 0.06600207835435867
training loss: 0.09827897697687149
training loss: 0.013795396313071251


training:  69%|██████████████████████████████████████████████████████████████████████████████████▋                                     | 625/907 [35:19<15:59,  3.40s/it]

training loss: 0.1614631861448288
training loss: 0.011289977468550205
training loss: 0.08803468942642212


training:  69%|███████████████████████████████████████████████████████████████████████████████████                                     | 628/907 [35:29<15:47,  3.40s/it]

training loss: 0.03495665639638901
training loss: 0.05813921242952347
training loss: 0.014452270232141018


training:  70%|███████████████████████████████████████████████████████████████████████████████████▍                                    | 631/907 [35:39<15:35,  3.39s/it]

training loss: 0.025452319532632828
training loss: 0.10699217766523361
training loss: 0.020312929525971413


training:  70%|███████████████████████████████████████████████████████████████████████████████████▉                                    | 634/907 [35:49<15:24,  3.38s/it]

training loss: 0.011377952061593533
training loss: 0.06058366596698761
training loss: 0.1105828508734703


training:  70%|████████████████████████████████████████████████████████████████████████████████████▎                                   | 637/907 [35:59<15:13,  3.38s/it]

training loss: 0.052409254014492035
training loss: 0.0685088261961937
training loss: 0.0955389142036438


training:  71%|████████████████████████████████████████████████████████████████████████████████████▋                                   | 640/907 [36:09<15:02,  3.38s/it]

training loss: 0.04475093260407448
training loss: 0.04495469108223915
validation loss: 0.057754382491111755
training loss: 0.07055062055587769


training:  71%|█████████████████████████████████████████████████████████████████████████████████████                                   | 643/907 [36:20<14:59,  3.41s/it]

training loss: 0.07601150870323181
training loss: 0.04746390879154205
training loss: 0.07679691165685654


training:  71%|█████████████████████████████████████████████████████████████████████████████████████▍                                  | 646/907 [36:30<14:48,  3.40s/it]

training loss: 0.052516814321279526
training loss: 0.0401667058467865
training loss: 0.11221128702163696


training:  72%|█████████████████████████████████████████████████████████████████████████████████████▊                                  | 649/907 [36:40<14:36,  3.40s/it]

training loss: 0.09108005464076996
training loss: 0.046768493950366974
training loss: 0.040500134229660034


training:  72%|██████████████████████████████████████████████████████████████████████████████████████▎                                 | 652/907 [36:50<14:25,  3.39s/it]

training loss: 0.07451096177101135
training loss: 0.039031025022268295
training loss: 0.08312105387449265


training:  72%|██████████████████████████████████████████████████████████████████████████████████████▋                                 | 655/907 [37:00<14:13,  3.39s/it]

training loss: 0.07822734862565994
training loss: 0.059210922569036484
training loss: 0.07905713468790054


training:  73%|███████████████████████████████████████████████████████████████████████████████████████                                 | 658/907 [37:10<14:03,  3.39s/it]

training loss: 0.058509983122348785
training loss: 0.08854219317436218
training loss: 0.06617502123117447
training loss: 0.07436253875494003


training:  73%|███████████████████████████████████████████████████████████████████████████████████████▍                                | 661/907 [37:21<13:58,  3.41s/it]

validation loss: 0.08182656764984131
training loss: 0.03924510255455971
training loss: 0.05404720455408096


training:  73%|███████████████████████████████████████████████████████████████████████████████████████▊                                | 664/907 [37:31<13:46,  3.40s/it]

training loss: 0.07981029152870178
training loss: 0.06111011654138565
training loss: 0.04569348692893982


training:  74%|████████████████████████████████████████████████████████████████████████████████████████▏                               | 667/907 [37:41<13:34,  3.40s/it]

training loss: 0.05536738783121109
training loss: 0.05389329046010971
training loss: 0.06945661455392838


training:  74%|████████████████████████████████████████████████████████████████████████████████████████▋                               | 670/907 [37:51<13:24,  3.39s/it]

training loss: 0.02985535003244877
training loss: 0.07374777644872665
training loss: 0.03930540382862091


training:  74%|█████████████████████████████████████████████████████████████████████████████████████████                               | 673/907 [38:01<13:14,  3.39s/it]

training loss: 0.06304708123207092
training loss: 0.07948555052280426
training loss: 0.06455882638692856


training:  75%|█████████████████████████████████████████████████████████████████████████████████████████▍                              | 676/907 [38:12<13:03,  3.39s/it]

training loss: 0.06490755081176758
training loss: 0.05326342210173607
training loss: 0.09352664649486542


training:  75%|█████████████████████████████████████████████████████████████████████████████████████████▊                              | 679/907 [38:22<12:52,  3.39s/it]

training loss: 0.05501599237322807
training loss: 0.05565452575683594
training loss: 0.04947162792086601
validation loss: 0.045311905443668365


training:  75%|██████████████████████████████████████████████████████████████████████████████████████████▏                             | 682/907 [38:32<12:47,  3.41s/it]

training loss: 0.03999747708439827
training loss: 0.07718677073717117
training loss: 0.08773800730705261


training:  76%|██████████████████████████████████████████████████████████████████████████████████████████▋                             | 685/907 [38:42<12:34,  3.40s/it]

training loss: 0.06878403574228287
training loss: 0.05485596880316734
training loss: 0.0636269673705101


training:  76%|███████████████████████████████████████████████████████████████████████████████████████████                             | 688/907 [38:52<12:23,  3.39s/it]

training loss: 0.04923275113105774
training loss: 0.07968223094940186
training loss: 0.054722629487514496


training:  76%|███████████████████████████████████████████████████████████████████████████████████████████▍                            | 691/907 [39:03<12:12,  3.39s/it]

training loss: 0.08995215594768524
training loss: 0.03508174419403076
training loss: 0.05929050222039223


training:  77%|███████████████████████████████████████████████████████████████████████████████████████████▊                            | 694/907 [39:13<12:02,  3.39s/it]

training loss: 0.07498180866241455
training loss: 0.10976913571357727
training loss: 0.043301668018102646


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████▏                           | 697/907 [39:23<11:51,  3.39s/it]

training loss: 0.11570360511541367
training loss: 0.05428130924701691
training loss: 0.06530365347862244


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████▌                           | 700/907 [39:33<11:40,  3.39s/it]

training loss: 0.09146276861429214
training loss: 0.06989752501249313
validation loss: 0.047402724623680115
training loss: 0.1018352136015892


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████                           | 703/907 [39:43<11:35,  3.41s/it]

training loss: 0.06526228785514832
training loss: 0.06219303607940674
training loss: 0.06404120475053787


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████▍                          | 706/907 [39:54<11:23,  3.40s/it]

training loss: 0.03905690461397171
training loss: 0.051050275564193726
training loss: 0.03952069953083992


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████▊                          | 709/907 [40:04<11:12,  3.39s/it]

training loss: 0.09147464483976364
training loss: 0.044430483132600784
training loss: 0.05290262773633003


training:  79%|██████████████████████████████████████████████████████████████████████████████████████████████▏                         | 712/907 [40:14<11:00,  3.39s/it]

training loss: 0.02524317242205143
training loss: 0.08382876962423325
training loss: 0.06789233535528183


training:  79%|██████████████████████████████████████████████████████████████████████████████████████████████▌                         | 715/907 [40:24<10:50,  3.39s/it]

training loss: 0.06916563957929611
training loss: 0.046668242663145065
training loss: 0.05309396609663963


training:  79%|██████████████████████████████████████████████████████████████████████████████████████████████▉                         | 718/907 [40:34<10:40,  3.39s/it]

training loss: 0.08620477467775345
training loss: 0.03445175662636757
training loss: 0.03650487959384918
training loss: 0.04047664999961853


training:  79%|███████████████████████████████████████████████████████████████████████████████████████████████▍                        | 721/907 [40:45<10:35,  3.41s/it]

validation loss: 0.05290227755904198
training loss: 0.05609765276312828
training loss: 0.05467194318771362


training:  80%|███████████████████████████████████████████████████████████████████████████████████████████████▊                        | 724/907 [40:55<10:23,  3.40s/it]

training loss: 0.051478058099746704
training loss: 0.05929110571742058
training loss: 0.04212070256471634


training:  80%|████████████████████████████████████████████████████████████████████████████████████████████████▏                       | 727/907 [41:05<10:11,  3.40s/it]

training loss: 0.06803926825523376
training loss: 0.05740097165107727
training loss: 0.04807358235120773


training:  80%|████████████████████████████████████████████████████████████████████████████████████████████████▌                       | 730/907 [41:15<09:59,  3.39s/it]

training loss: 0.059863533824682236
training loss: 0.03620244190096855
training loss: 0.06400644779205322


training:  81%|████████████████████████████████████████████████████████████████████████████████████████████████▉                       | 733/907 [41:25<09:49,  3.39s/it]

training loss: 0.06936991959810257
training loss: 0.0424797460436821
training loss: 0.07063020765781403


training:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████▍                      | 736/907 [41:35<09:38,  3.38s/it]

training loss: 0.04627387225627899
training loss: 0.03158041089773178
training loss: 0.07021136581897736


training:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████▊                      | 739/907 [41:45<09:28,  3.39s/it]

training loss: 0.0389702245593071
training loss: 0.06945426017045975
training loss: 0.08858935534954071
validation loss: 0.059152089059352875


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▏                     | 742/907 [41:56<09:23,  3.42s/it]

training loss: 0.09163138270378113
training loss: 0.07235419750213623
training loss: 0.06547126173973083


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▌                     | 745/907 [42:06<09:11,  3.41s/it]

training loss: 0.07842419296503067
training loss: 0.04994241148233414
training loss: 0.09434371441602707


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▉                     | 748/907 [42:16<09:00,  3.40s/it]

training loss: 0.04419607296586037
training loss: 0.07611896097660065
training loss: 0.0596456304192543


training:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████▎                    | 751/907 [42:26<08:49,  3.39s/it]

training loss: 0.025942804291844368
training loss: 0.08422546088695526
training loss: 0.04927615076303482


training:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 754/907 [42:36<08:38,  3.39s/it]

training loss: 0.04008623957633972
training loss: 0.06237412989139557
training loss: 0.05994036793708801


training:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████▏                   | 757/907 [42:47<08:27,  3.38s/it]

training loss: 0.048703160136938095
training loss: 0.07110846787691116
training loss: 0.03373070806264877


training:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████▌                   | 760/907 [42:57<08:17,  3.38s/it]

training loss: 0.03957360237836838
training loss: 0.0745953842997551
validation loss: 0.04973607137799263
training loss: 0.07040300965309143


training:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████▉                   | 763/907 [43:07<08:12,  3.42s/it]

training loss: 0.06970642507076263
training loss: 0.06591317057609558
training loss: 0.08086995780467987


training:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎                  | 766/907 [43:17<08:01,  3.42s/it]

training loss: 0.03914983198046684
training loss: 0.03958454355597496
training loss: 0.0587288923561573


training:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████▋                  | 769/907 [43:28<07:50,  3.41s/it]

training loss: 0.07295451313257217
training loss: 0.04575996845960617
training loss: 0.10439328849315643


training:  85%|██████████████████████████████████████████████████████████████████████████████████████████████████████▏                 | 772/907 [43:38<07:38,  3.40s/it]

training loss: 0.07169537246227264
training loss: 0.03144722804427147
training loss: 0.10662569105625153


training:  85%|██████████████████████████████████████████████████████████████████████████████████████████████████████▌                 | 775/907 [43:48<07:27,  3.39s/it]

training loss: 0.06420548260211945
training loss: 0.06995175778865814
training loss: 0.05855906382203102


training:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████▉                 | 778/907 [43:58<07:17,  3.39s/it]

training loss: 0.05999862775206566
training loss: 0.06620783358812332
training loss: 0.07756049185991287
training loss: 0.04690663889050484


training:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████▎                | 781/907 [44:08<07:10,  3.41s/it]

validation loss: 0.0527539923787117
training loss: 0.05914252623915672
training loss: 0.060817327350378036


training:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████▋                | 784/907 [44:19<06:58,  3.40s/it]

training loss: 0.06311928480863571
training loss: 0.054417893290519714
training loss: 0.07523628324270248


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████                | 787/907 [44:29<06:48,  3.40s/it]

training loss: 0.05187908932566643
training loss: 0.08829820156097412
training loss: 0.07808446884155273


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████▌               | 790/907 [44:39<06:37,  3.39s/it]

training loss: 0.06123581528663635
training loss: 0.033781226724386215
training loss: 0.056426357477903366


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 793/907 [44:49<06:26,  3.39s/it]

training loss: 0.08431289345026016
training loss: 0.0309950839728117
training loss: 0.11346439272165298


training:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▎              | 796/907 [44:59<06:15,  3.39s/it]

training loss: 0.05502353236079216
training loss: 0.052989546209573746
training loss: 0.07729411870241165


training:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▋              | 799/907 [45:09<06:05,  3.38s/it]

training loss: 0.052408598363399506
training loss: 0.056310705840587616
training loss: 0.08360499888658524
validation loss: 0.08673439174890518


training:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████              | 802/907 [45:20<05:57,  3.41s/it]

training loss: 0.068337082862854
training loss: 0.10328888148069382
training loss: 0.04499717056751251


training:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▌             | 805/907 [45:30<05:46,  3.40s/it]

training loss: 0.06669564545154572
training loss: 0.031281039118766785
training loss: 0.04543633759021759


training:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▉             | 808/907 [45:40<05:36,  3.39s/it]

training loss: 0.047983571887016296
training loss: 0.08452006429433823
training loss: 0.045034464448690414


training:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▎            | 811/907 [45:50<05:25,  3.39s/it]

training loss: 0.04743633419275284
training loss: 0.037867121398448944
training loss: 0.05825094133615494


training:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▋            | 814/907 [46:00<05:15,  3.39s/it]

training loss: 0.029768483713269234
training loss: 0.06859193742275238
training loss: 0.04574869945645332


training:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 817/907 [46:10<05:05,  3.39s/it]

training loss: 0.03370000049471855
training loss: 0.05261105298995972
training loss: 0.042319681495428085


training:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▍           | 820/907 [46:21<04:54,  3.38s/it]

training loss: 0.0486859567463398
training loss: 0.06329341232776642
validation loss: 0.07820024341344833
training loss: 0.031310632824897766


training:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▉           | 823/907 [46:31<04:46,  3.41s/it]

training loss: 0.05637829378247261
training loss: 0.06469661742448807
training loss: 0.05324599891901016


training:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 826/907 [46:41<04:35,  3.40s/it]

training loss: 0.06107243895530701
training loss: 0.07095303386449814
training loss: 0.032217469066381454


training:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▋          | 829/907 [46:51<04:24,  3.39s/it]

training loss: 0.05003447085618973
training loss: 0.0629952996969223
training loss: 0.07153045386075974


training:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████          | 832/907 [47:01<04:14,  3.39s/it]

training loss: 0.06425808370113373
training loss: 0.0639549195766449
training loss: 0.013712006621062756


training:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▍         | 835/907 [47:12<04:04,  3.39s/it]

training loss: 0.049155592918395996
training loss: 0.05803035572171211
training loss: 0.11646351218223572


training:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▊         | 838/907 [47:22<03:53,  3.39s/it]

training loss: 0.07968806475400925
training loss: 0.06285704672336578
training loss: 0.055531322956085205
training loss: 0.042353563010692596


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▎        | 841/907 [47:32<03:45,  3.41s/it]

validation loss: 0.0434979647397995
training loss: 0.039838314056396484
training loss: 0.045137275010347366


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 844/907 [47:42<03:34,  3.40s/it]

training loss: 0.07742609828710556
training loss: 0.024644749239087105
training loss: 0.05466992408037186


training:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 847/907 [47:52<03:23,  3.40s/it]

training loss: 0.041098132729530334
training loss: 0.053921837359666824
training loss: 0.0761399120092392


training:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍       | 850/907 [48:02<03:13,  3.39s/it]

training loss: 0.04755334556102753
training loss: 0.09382323175668716
training loss: 0.06331279873847961


training:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊       | 853/907 [48:13<03:02,  3.39s/it]

training loss: 0.04977395385503769
training loss: 0.09417010843753815
training loss: 0.05899646133184433


training:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 856/907 [48:23<02:52,  3.39s/it]

training loss: 0.06906025111675262
training loss: 0.04809951037168503
training loss: 0.09420931339263916


training:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 859/907 [48:33<02:42,  3.39s/it]

training loss: 0.059012170881032944
training loss: 0.04493742436170578
training loss: 0.05168302729725838
validation loss: 0.08450839668512344


training:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 862/907 [48:43<02:33,  3.41s/it]

training loss: 0.04518547281622887
training loss: 0.05408064275979996
training loss: 0.07656484097242355


training:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍     | 865/907 [48:54<02:23,  3.41s/it]

training loss: 0.035492971539497375
training loss: 0.049685146659612656
training loss: 0.04858170449733734


training:  96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 868/907 [49:04<02:12,  3.40s/it]

training loss: 0.043994802981615067
training loss: 0.03830878809094429
training loss: 0.05441468581557274


training:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏    | 871/907 [49:14<02:02,  3.39s/it]

training loss: 0.05959651991724968
training loss: 0.05834396183490753
training loss: 0.10935967415571213


training:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 874/907 [49:24<01:51,  3.38s/it]

training loss: 0.06905621290206909
training loss: 0.06813763827085495
training loss: 0.07603342831134796


training:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 877/907 [49:34<01:41,  3.38s/it]

training loss: 0.0814528837800026
training loss: 0.059251148253679276
training loss: 0.06794261932373047


training:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 880/907 [49:44<01:31,  3.39s/it]

training loss: 0.07302749902009964
training loss: 0.062041815370321274
validation loss: 0.06459450721740723
training loss: 0.07615673542022705


training:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 883/907 [49:55<01:22,  3.42s/it]

training loss: 0.07657676935195923
training loss: 0.06753990054130554
training loss: 0.026947613805532455


training:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 886/907 [50:05<01:11,  3.41s/it]

training loss: 0.040666479617357254
training loss: 0.07026249170303345
training loss: 0.023180866613984108


training:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 889/907 [50:15<01:01,  3.40s/it]

training loss: 0.03466973826289177
training loss: 0.06749856472015381
training loss: 0.06022368371486664


training:  98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 892/907 [50:25<00:50,  3.39s/it]

training loss: 0.04288370907306671
training loss: 0.07329452782869339
training loss: 0.058466874063014984


training:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 895/907 [50:35<00:40,  3.39s/it]

training loss: 0.050947051495313644
training loss: 0.05724982172250748
training loss: 0.06315208226442337


training:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 898/907 [50:45<00:30,  3.38s/it]

training loss: 0.05138237029314041
training loss: 0.052319128066301346
training loss: 0.052604902535676956
training loss: 0.06061793863773346


training:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 901/907 [50:56<00:20,  3.41s/it]

validation loss: 0.0975840762257576
training loss: 0.04732118546962738
training loss: 0.0483684279024601


training: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 904/907 [51:06<00:10,  3.40s/it]

training loss: 0.06169186905026436
training loss: 0.05078723654150963
training loss: 0.053761035203933716


training: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 907/907 [51:16<00:00,  3.39s/it]

training loss: 0.09037511795759201





In [19]:
torch.save(model.state_dict(), "model/trial01/saved_model.pth")