In [1]:
%cd drive/My Drive/CS5814/HW3


/content/drive/My Drive/CS5814/HW3


In [2]:
pip install transformers

Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 8.8 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 9.1 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 62.9 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 64.0 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 78.9 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found ex

In [3]:
import json
import os
import random
import gc

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm
from transformers import AdamW, AutoTokenizer, AutoModelForSequenceClassification

torch.cuda.empty_cache()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)


cuda:0


In [4]:
def preprocess_data(file_loc='code_dataset.jsonl', generate_histogram=False):
    """
    Loads and processing the jsonl file,

    :param file_loc: location of target jsonl file
    :param generate_histogram: Flag to display histogram of function lengths
    :return: dataframe of preprocessed jsons
    """

    with open(file_loc, 'r') as json_file:
        json_list = list(json_file)

    code_list = []
    for json_str in json_list:
        result = json.loads(json_str)
        code_list.append(result)

    code_df = pd.DataFrame(code_list)

    total = code_df['target'].sum()
    proportion = total / code_df.shape[0]

    print("Insecure code counts: {}, Total code counts: {}, Proportion {}".format(total, code_df.shape[0], proportion))

    if generate_histogram:
        plt.hist(code_df['func'].str.len(), bins=100)
        plt.show()

    return code_df

In [5]:


def split_data(input_data, attention_data, label_data, train_ratio=0.8, val_ratio=0.10, max_len=512):
    """
    Splits data in accordance with provdied ratios, additionally discards functions with > max_len tokens
        as these will not be processed by the model will (can truncate, yet may truncate the error in the code)

    :param input_data: input functions
    :param attention_data: attention map
    :param label_data: target labels
    :param train_ratio: ratio of data to train on
    :param val_ratio: ratio of data to validate with (test is inferred from this and train)
    :param max_len: max number of tokens allowed for training date

    :return: 3 tuples for train val and test containing (input, attention, target)
    """
    # Removing excessively long elements from dataset
    valid_token_index = [i for i in range(len(input_data)) if len(input_data[i]) <= max_len]
    X_data = np.array(input_data)[valid_token_index]
    A_data = np.array(attention_data)[valid_token_index]
    Y_data = np.array(label_data)[valid_token_index]

    dataset_size = len(X_data)

    # Determining index to split dataset
    random_id = random.sample(range(dataset_size), dataset_size)
    train_split_id = int(train_ratio * dataset_size)
    val_split_id = int((train_ratio + val_ratio) * dataset_size)

    train_ids = random_id[:train_split_id]
    val_ids = random_id[train_split_id:val_split_id]
    test_ids = random_id[val_split_id:]

    X_train = torch.tensor(list(X_data[train_ids]))
    A_train = torch.tensor(list(A_data[train_ids]))
    Y_train = torch.tensor(list(Y_data[train_ids]))

    X_val = torch.tensor(list(X_data[val_ids]))
    A_val = torch.tensor(list(A_data[val_ids]))
    Y_val = torch.tensor(list(Y_data[val_ids]))

    X_test = torch.tensor(list(X_data[test_ids]))
    A_test = torch.tensor(list(A_data[test_ids]))
    Y_test = torch.tensor(list(Y_data[test_ids]))

    return (X_train, A_train, Y_train), (X_val, A_val, Y_val), (X_test, A_test, Y_test)



In [6]:

def tokenize(code_df, model_name='codebert-base'):
    """
    Apply the tokenizer from the huggingface pretrained model

    :param code_df: dataframe of preprocess code (from jsonl)
    :param model_name: model name (targeting local install)
    :return: 3 tuples for train val and test containing (input, attention, target)
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    inputs = tokenizer(code_df['func'].tolist(), truncation=False, padding='max_length')

    input_data = inputs['input_ids']
    attention_data = inputs['attention_mask']
    label_data = torch.tensor(code_df['target'].tolist())  # TODO - this can be directly converted to a np array

    return split_data(input_data, attention_data, label_data, max_len=512)



In [7]:

def train(model, train_data, val_data, epochs=5, batch_size=16, learning_rate=2e-5, validate_per=500,
          run_name="temp", run_descrption=None):
    """
    Main fine-tuning training loop for the provided model

    :param model: model loaded with predefined weights
    :param train_data: tuple of X_train, A_train, Y_train (X = inputs, A = attention, Y = target)
    :param val_data: tuple X_val, A_val, Y_val
    :param epochs: number of epochs for training
    :param batch_size: batch size (see note below about batch_hack)
    :param learning_rate: optimizer learning rate
    :param validate_per: number of weight updates before validation occurs
                            (notes: - if batch_size = 32, and validate_per = 32, validation will occur every batch
                                    - this is wrt the start of each epoch
                                    - validation will always occour at the start of each epoch (step 0))
    :param run_name: name used to saving checkpoints and log files within codebert_finetune_runs
    :param run_descrption: string that is saved to info.txt describing the run


    :return: None (models are saved in checkpoints along with log data)
    """

    # Creating dir to save logs and checkpoints, re
    dir_name = "codebert_finetune_runs/{}".format(run_name)
    if os.path.exists(dir_name):
        print("run name already exists, exiting to prevent overwriting")
        return 0
    else:
        os.makedirs(dir_name)

    # Saving run description.txt
    if run_descrption is not None:
        with open("{}/info.txt".format(dir_name), "a+") as f:
            f.write(run_descrption)

    # Unpacking data
    X_train, A_train, Y_train = train_data
    X_val, A_val, Y_val = val_data

    optimizer = AdamW(model.parameters(), lr=learning_rate)

    batch_hack = batch_size  # See note below regarding limited GPU memory

    # Initializing arrays for tracking loss
    train_loss_hist = []
    val_loss_hist = []

    # Counter to track batches (see note below related to GPU memory)
    batch_count = 0
    # validate_per_batch = int(validate_per/batch_hack)

    # Moving model to GPU if configured
    model = model.to(device)

    validate_per = int(validate_per/batch_size)

    for epoch in range(epochs):

        # Generating random index for manual shuffling of data each epoch as note using DataLoaders
        permutation = torch.randperm(X_train.shape[0])

        # Note here that only a single element is loaded at each iteration (batch size = 1) due to GPU memory constraint
        for batch_id, i in enumerate(range(0, X_train.shape[0], batch_hack)):

            # Loading batch and moving to device
            indices = permutation[i:i + batch_hack]
            batch_X, batch_Y = X_train[indices].to(device), Y_train[indices].to(device), \

            model.train()

            # Forward pass
            outputs = model(batch_X, labels=batch_Y)

            # Backward pass
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            # Tracking loss
            train_loss_hist.append(float(loss.item()))

            # Training output
            train_output = "Epoch:{} Step:{} Training_loss:{:.6f}".format(epoch, i, loss.item())
            print(train_output+" Training_loss_avg:{:.6f}".format(np.average(train_loss_hist[-50:])))
            with open("{}/train_loss.txt".format(dir_name), "a+") as f:
                f.write(train_output+"\n")

            # Validation
            if batch_id % validate_per == 0:
                val_loss_total = 0
                model.eval()
                print("Validating:")
                for val_badtch_id, j in tqdm(enumerate(range(0, X_val.shape[0], batch_hack))):
                    # Loading singular validation data (overwrites train data as can only load 1 intp GPU)
                    batch_X, batch_Y = X_val[j:j+batch_hack].to(device).reshape(batch_hack, -1), Y_val[j:j+batch_hack].to(device).reshape(batch_hack, -1)
                    with torch.no_grad():
                        val_outputs = model(batch_X, labels=batch_Y)
                    val_loss_total += float(val_outputs['loss'].item())

                    del batch_X
                    del batch_Y

                # Adding average loss to tracker
                val_average = val_loss_total / (val_badtch_id+1)
                val_loss_hist.append(val_average)

                # Validation output and logging
                val_output = "Epoch:{} Step:{} Val_loss:{:.6f}".format(epoch, i, val_average)
                print(val_output)
                with open("{}/val_los.txt".format(dir_name), "a+") as f:
                    f.write(val_output+"\n")

        # End of epoch checkpoint
        model.save_pretrained("{}/epoch_{}".format(dir_name, epoch + 1))



In [None]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """

    run_name = "test_colab_GPU_highRAM_8_val_8"
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False

    code_df = preprocess_data(file_loc='code_dataset.jsonl')
    train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

    # Loading model from checkpoint if location provided
    if online:
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=5,
          batch_size=8,
          learning_rate=2e-5,
          validate_per=100,
          run_name=run_name,
          run_descrption="Colab with highRam, lr=2e-5, validate per 100, batch 16")


In [None]:
gc.collect()
torch.cuda.empty_cache()
main()

Insecure code counts: 3729, Total code counts: 8000, Proportion 0.466125


Some weights of the model checkpoint at codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at codebert-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for p

Epoch:0 Step:0 Training_loss:0.719893 Training_loss_avg:0.719893
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:0 Val_loss:0.685511
Epoch:0 Step:8 Training_loss:0.663729 Training_loss_avg:0.691811
Epoch:0 Step:16 Training_loss:0.693459 Training_loss_avg:0.692360
Epoch:0 Step:24 Training_loss:0.685495 Training_loss_avg:0.690644
Epoch:0 Step:32 Training_loss:0.737248 Training_loss_avg:0.699965
Epoch:0 Step:40 Training_loss:0.732875 Training_loss_avg:0.705450
Epoch:0 Step:48 Training_loss:0.617226 Training_loss_avg:0.692846
Epoch:0 Step:56 Training_loss:0.643524 Training_loss_avg:0.686681
Epoch:0 Step:64 Training_loss:0.736142 Training_loss_avg:0.692177
Epoch:0 Step:72 Training_loss:0.783754 Training_loss_avg:0.701334
Epoch:0 Step:80 Training_loss:0.658209 Training_loss_avg:0.697414
Epoch:0 Step:88 Training_loss:0.741494 Training_loss_avg:0.701087
Epoch:0 Step:96 Training_loss:0.536806 Training_loss_avg:0.688450
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:96 Val_loss:0.684414
Epoch:0 Step:104 Training_loss:0.557271 Training_loss_avg:0.679080
Epoch:0 Step:112 Training_loss:0.631580 Training_loss_avg:0.675914
Epoch:0 Step:120 Training_loss:0.860519 Training_loss_avg:0.687452
Epoch:0 Step:128 Training_loss:0.690914 Training_loss_avg:0.687655
Epoch:0 Step:136 Training_loss:0.745955 Training_loss_avg:0.690894
Epoch:0 Step:144 Training_loss:0.749100 Training_loss_avg:0.693958
Epoch:0 Step:152 Training_loss:0.760438 Training_loss_avg:0.697282
Epoch:0 Step:160 Training_loss:0.682828 Training_loss_avg:0.696593
Epoch:0 Step:168 Training_loss:0.712765 Training_loss_avg:0.697328
Epoch:0 Step:176 Training_loss:0.669402 Training_loss_avg:0.696114
Epoch:0 Step:184 Training_loss:0.749100 Training_loss_avg:0.698322
Epoch:0 Step:192 Training_loss:0.690476 Training_loss_avg:0.698008
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:192 Val_loss:0.687299
Epoch:0 Step:200 Training_loss:0.752611 Training_loss_avg:0.700108
Epoch:0 Step:208 Training_loss:0.647078 Training_loss_avg:0.698144
Epoch:0 Step:216 Training_loss:0.640915 Training_loss_avg:0.696100
Epoch:0 Step:224 Training_loss:0.684021 Training_loss_avg:0.695684
Epoch:0 Step:232 Training_loss:0.706079 Training_loss_avg:0.696030
Epoch:0 Step:240 Training_loss:0.641053 Training_loss_avg:0.694257
Epoch:0 Step:248 Training_loss:0.660586 Training_loss_avg:0.693204
Epoch:0 Step:256 Training_loss:0.696276 Training_loss_avg:0.693298
Epoch:0 Step:264 Training_loss:0.650010 Training_loss_avg:0.692024
Epoch:0 Step:272 Training_loss:0.724021 Training_loss_avg:0.692939
Epoch:0 Step:280 Training_loss:0.572466 Training_loss_avg:0.689592
Epoch:0 Step:288 Training_loss:0.737153 Training_loss_avg:0.690878
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:288 Val_loss:0.752170
Epoch:0 Step:296 Training_loss:0.732682 Training_loss_avg:0.691978
Epoch:0 Step:304 Training_loss:0.846411 Training_loss_avg:0.695938
Epoch:0 Step:312 Training_loss:0.738213 Training_loss_avg:0.696994
Epoch:0 Step:320 Training_loss:0.623786 Training_loss_avg:0.695209
Epoch:0 Step:328 Training_loss:0.569308 Training_loss_avg:0.692211
Epoch:0 Step:336 Training_loss:0.691844 Training_loss_avg:0.692203
Epoch:0 Step:344 Training_loss:0.783630 Training_loss_avg:0.694281
Epoch:0 Step:352 Training_loss:0.753503 Training_loss_avg:0.695597
Epoch:0 Step:360 Training_loss:0.623374 Training_loss_avg:0.694027
Epoch:0 Step:368 Training_loss:0.783424 Training_loss_avg:0.695929
Epoch:0 Step:376 Training_loss:0.813172 Training_loss_avg:0.698371
Epoch:0 Step:384 Training_loss:0.733559 Training_loss_avg:0.699089
Validating:


52it [00:07,  6.58it/s]


Epoch:0 Step:384 Val_loss:0.701402
Epoch:0 Step:392 Training_loss:0.676601 Training_loss_avg:0.698640
Epoch:0 Step:400 Training_loss:0.732960 Training_loss_avg:0.698901
Epoch:0 Step:408 Training_loss:0.693916 Training_loss_avg:0.699505
Epoch:0 Step:416 Training_loss:0.686022 Training_loss_avg:0.699356
Epoch:0 Step:424 Training_loss:0.712027 Training_loss_avg:0.699887
Epoch:0 Step:432 Training_loss:0.648816 Training_loss_avg:0.698118
Epoch:0 Step:440 Training_loss:0.692027 Training_loss_avg:0.697301
Epoch:0 Step:448 Training_loss:0.646609 Training_loss_avg:0.697889
Epoch:0 Step:456 Training_loss:0.707567 Training_loss_avg:0.699169
Epoch:0 Step:464 Training_loss:0.604809 Training_loss_avg:0.696543
Epoch:0 Step:472 Training_loss:0.646687 Training_loss_avg:0.693801
Epoch:0 Step:480 Training_loss:0.677372 Training_loss_avg:0.694185
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:480 Val_loss:0.684362
Epoch:0 Step:488 Training_loss:0.709784 Training_loss_avg:0.693550
Epoch:0 Step:496 Training_loss:0.736874 Training_loss_avg:0.697552
Epoch:0 Step:504 Training_loss:0.635758 Training_loss_avg:0.699122
Epoch:0 Step:512 Training_loss:0.869815 Training_loss_avg:0.703886
Epoch:0 Step:520 Training_loss:0.940378 Training_loss_avg:0.705483
Epoch:0 Step:528 Training_loss:0.834993 Training_loss_avg:0.708365
Epoch:0 Step:536 Training_loss:0.619964 Training_loss_avg:0.705845
Epoch:0 Step:544 Training_loss:0.615411 Training_loss_avg:0.703171
Epoch:0 Step:552 Training_loss:0.782321 Training_loss_avg:0.703609
Epoch:0 Step:560 Training_loss:0.707531 Training_loss_avg:0.704103
Epoch:0 Step:568 Training_loss:0.697454 Training_loss_avg:0.703797
Epoch:0 Step:576 Training_loss:0.642170 Training_loss_avg:0.703252
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:576 Val_loss:0.682627
Epoch:0 Step:584 Training_loss:0.651952 Training_loss_avg:0.701309
Epoch:0 Step:592 Training_loss:0.641997 Training_loss_avg:0.700340
Epoch:0 Step:600 Training_loss:0.689057 Training_loss_avg:0.699069
Epoch:0 Step:608 Training_loss:0.661179 Training_loss_avg:0.699351
Epoch:0 Step:616 Training_loss:0.648385 Training_loss_avg:0.699500
Epoch:0 Step:624 Training_loss:0.660514 Training_loss_avg:0.699030
Epoch:0 Step:632 Training_loss:0.735144 Training_loss_avg:0.699611
Epoch:0 Step:640 Training_loss:0.605898 Training_loss_avg:0.698908
Epoch:0 Step:648 Training_loss:0.585267 Training_loss_avg:0.697402
Epoch:0 Step:656 Training_loss:0.751974 Training_loss_avg:0.698516
Epoch:0 Step:664 Training_loss:0.690899 Training_loss_avg:0.699334
Epoch:0 Step:672 Training_loss:0.672309 Training_loss_avg:0.698299
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:672 Val_loss:0.682581
Epoch:0 Step:680 Training_loss:0.702132 Training_loss_avg:0.700893
Epoch:0 Step:688 Training_loss:0.662701 Training_loss_avg:0.699404
Epoch:0 Step:696 Training_loss:0.839451 Training_loss_avg:0.701539
Epoch:0 Step:704 Training_loss:0.682856 Training_loss_avg:0.698268
Epoch:0 Step:712 Training_loss:0.741299 Training_loss_avg:0.698330
Epoch:0 Step:720 Training_loss:0.665278 Training_loss_avg:0.699159
Epoch:0 Step:728 Training_loss:0.565000 Training_loss_avg:0.699073
Epoch:0 Step:736 Training_loss:0.754044 Training_loss_avg:0.700317
Epoch:0 Step:744 Training_loss:0.707396 Training_loss_avg:0.698793
Epoch:0 Step:752 Training_loss:0.753587 Training_loss_avg:0.698794
Epoch:0 Step:760 Training_loss:0.699280 Training_loss_avg:0.700312
Epoch:0 Step:768 Training_loss:0.751233 Training_loss_avg:0.699669
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:768 Val_loss:0.683029
Epoch:0 Step:776 Training_loss:0.752943 Training_loss_avg:0.698464
Epoch:0 Step:784 Training_loss:0.707928 Training_loss_avg:0.697951
Epoch:0 Step:792 Training_loss:0.664962 Training_loss_avg:0.697719
Epoch:0 Step:800 Training_loss:0.691302 Training_loss_avg:0.696885
Epoch:0 Step:808 Training_loss:0.698324 Training_loss_avg:0.696974
Epoch:0 Step:816 Training_loss:0.687642 Training_loss_avg:0.697006
Epoch:0 Step:824 Training_loss:0.691316 Training_loss_avg:0.696592
Epoch:0 Step:832 Training_loss:0.745121 Training_loss_avg:0.698518
Epoch:0 Step:840 Training_loss:0.687275 Training_loss_avg:0.698423
Epoch:0 Step:848 Training_loss:0.692260 Training_loss_avg:0.699336
Epoch:0 Step:856 Training_loss:0.680469 Training_loss_avg:0.698794
Epoch:0 Step:864 Training_loss:0.729078 Training_loss_avg:0.701279
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:864 Val_loss:0.695163
Epoch:0 Step:872 Training_loss:0.713680 Training_loss_avg:0.702619
Epoch:0 Step:880 Training_loss:0.722710 Training_loss_avg:0.703526
Epoch:0 Step:888 Training_loss:0.705693 Training_loss_avg:0.703444
Epoch:0 Step:896 Training_loss:0.696139 Training_loss_avg:0.702629
Epoch:0 Step:904 Training_loss:0.631343 Training_loss_avg:0.702541
Epoch:0 Step:912 Training_loss:0.684513 Training_loss_avg:0.698835
Epoch:0 Step:920 Training_loss:0.668914 Training_loss_avg:0.693406
Epoch:0 Step:928 Training_loss:0.735910 Training_loss_avg:0.691424
Epoch:0 Step:936 Training_loss:0.630143 Training_loss_avg:0.691628
Epoch:0 Step:944 Training_loss:0.698015 Training_loss_avg:0.693280
Epoch:0 Step:952 Training_loss:0.767509 Training_loss_avg:0.692983
Epoch:0 Step:960 Training_loss:0.689829 Training_loss_avg:0.692629
Validating:


52it [00:07,  6.60it/s]


Epoch:0 Step:960 Val_loss:0.681099
Epoch:0 Step:968 Training_loss:0.716157 Training_loss_avg:0.693004
Epoch:0 Step:976 Training_loss:0.719348 Training_loss_avg:0.694547
Epoch:0 Step:984 Training_loss:0.607395 Training_loss_avg:0.693656
Epoch:0 Step:992 Training_loss:0.613473 Training_loss_avg:0.693085
Epoch:0 Step:1000 Training_loss:0.624129 Training_loss_avg:0.691787
Epoch:0 Step:1008 Training_loss:0.585251 Training_loss_avg:0.690268
Epoch:0 Step:1016 Training_loss:0.602816 Training_loss_avg:0.689357
Epoch:0 Step:1024 Training_loss:0.697109 Training_loss_avg:0.690089
Epoch:0 Step:1032 Training_loss:0.610816 Training_loss_avg:0.687602
Epoch:0 Step:1040 Training_loss:0.624472 Training_loss_avg:0.687974
Epoch:0 Step:1048 Training_loss:0.674315 Training_loss_avg:0.689755
Epoch:0 Step:1056 Training_loss:0.677155 Training_loss_avg:0.688258
Validating:


52it [00:07,  6.60it/s]


Epoch:0 Step:1056 Val_loss:0.691648
Epoch:0 Step:1064 Training_loss:0.900785 Training_loss_avg:0.692456
Epoch:0 Step:1072 Training_loss:0.598928 Training_loss_avg:0.690988
Epoch:0 Step:1080 Training_loss:0.816432 Training_loss_avg:0.693274
Epoch:0 Step:1088 Training_loss:0.565180 Training_loss_avg:0.691324
Epoch:0 Step:1096 Training_loss:0.537722 Training_loss_avg:0.685289
Epoch:0 Step:1104 Training_loss:0.890584 Training_loss_avg:0.689444
Epoch:0 Step:1112 Training_loss:0.522366 Training_loss_avg:0.685065
Epoch:0 Step:1120 Training_loss:0.822218 Training_loss_avg:0.688204
Epoch:0 Step:1128 Training_loss:0.576115 Training_loss_avg:0.688426
Epoch:0 Step:1136 Training_loss:0.720530 Training_loss_avg:0.687756
Epoch:0 Step:1144 Training_loss:0.530303 Training_loss_avg:0.684214
Epoch:0 Step:1152 Training_loss:0.727621 Training_loss_avg:0.683695
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:1152 Val_loss:0.684098
Epoch:0 Step:1160 Training_loss:0.607201 Training_loss_avg:0.681853
Epoch:0 Step:1168 Training_loss:0.667918 Training_loss_avg:0.680187
Epoch:0 Step:1176 Training_loss:0.685956 Training_loss_avg:0.678847
Epoch:0 Step:1184 Training_loss:0.561059 Training_loss_avg:0.675910
Epoch:0 Step:1192 Training_loss:0.844858 Training_loss_avg:0.679508
Epoch:0 Step:1200 Training_loss:0.728696 Training_loss_avg:0.680256
Epoch:0 Step:1208 Training_loss:0.798537 Training_loss_avg:0.682260
Epoch:0 Step:1216 Training_loss:0.799540 Training_loss_avg:0.684498
Epoch:0 Step:1224 Training_loss:0.613983 Training_loss_avg:0.682951
Epoch:0 Step:1232 Training_loss:0.729336 Training_loss_avg:0.682636
Epoch:0 Step:1240 Training_loss:0.739738 Training_loss_avg:0.683685
Epoch:0 Step:1248 Training_loss:0.611440 Training_loss_avg:0.682069
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:1248 Val_loss:0.681897
Epoch:0 Step:1256 Training_loss:0.749256 Training_loss_avg:0.683444
Epoch:0 Step:1264 Training_loss:0.660027 Training_loss_avg:0.682063
Epoch:0 Step:1272 Training_loss:0.701653 Training_loss_avg:0.681823
Epoch:0 Step:1280 Training_loss:0.747200 Training_loss_avg:0.682313
Epoch:0 Step:1288 Training_loss:0.692541 Training_loss_avg:0.682049
Epoch:0 Step:1296 Training_loss:0.689643 Training_loss_avg:0.681920
Epoch:0 Step:1304 Training_loss:0.709769 Training_loss_avg:0.683488
Epoch:0 Step:1312 Training_loss:0.728944 Training_loss_avg:0.684377
Epoch:0 Step:1320 Training_loss:0.780812 Training_loss_avg:0.686615
Epoch:0 Step:1328 Training_loss:0.670324 Training_loss_avg:0.685303
Epoch:0 Step:1336 Training_loss:0.677432 Training_loss_avg:0.686249
Epoch:0 Step:1344 Training_loss:0.718483 Training_loss_avg:0.686658
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:1344 Val_loss:0.691853
Epoch:0 Step:1352 Training_loss:0.689789 Training_loss_avg:0.685104
Epoch:0 Step:1360 Training_loss:0.698289 Training_loss_avg:0.685273
Epoch:0 Step:1368 Training_loss:0.688419 Training_loss_avg:0.684718
Epoch:0 Step:1376 Training_loss:0.714423 Training_loss_avg:0.684620
Epoch:0 Step:1384 Training_loss:0.665737 Training_loss_avg:0.685786
Epoch:0 Step:1392 Training_loss:0.668212 Training_loss_avg:0.686881
Epoch:0 Step:1400 Training_loss:0.697551 Training_loss_avg:0.688350
Epoch:0 Step:1408 Training_loss:0.699460 Training_loss_avg:0.690634
Epoch:0 Step:1416 Training_loss:0.683051 Training_loss_avg:0.692239
Epoch:0 Step:1424 Training_loss:0.806114 Training_loss_avg:0.694419
Epoch:0 Step:1432 Training_loss:0.724778 Training_loss_avg:0.696698
Epoch:0 Step:1440 Training_loss:0.680508 Training_loss_avg:0.697819
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:1440 Val_loss:0.697800
Epoch:0 Step:1448 Training_loss:0.724214 Training_loss_avg:0.698817
Epoch:0 Step:1456 Training_loss:0.691185 Training_loss_avg:0.699097
Epoch:0 Step:1464 Training_loss:0.704328 Training_loss_avg:0.695168
Epoch:0 Step:1472 Training_loss:0.742348 Training_loss_avg:0.698036
Epoch:0 Step:1480 Training_loss:0.681901 Training_loss_avg:0.695346
Epoch:0 Step:1488 Training_loss:0.711714 Training_loss_avg:0.698276
Epoch:0 Step:1496 Training_loss:0.717900 Training_loss_avg:0.701880
Epoch:0 Step:1504 Training_loss:0.753908 Training_loss_avg:0.699147
Epoch:0 Step:1512 Training_loss:0.724141 Training_loss_avg:0.703182
Epoch:0 Step:1520 Training_loss:0.689250 Training_loss_avg:0.700523
Epoch:0 Step:1528 Training_loss:0.684490 Training_loss_avg:0.702690
Epoch:0 Step:1536 Training_loss:0.661344 Training_loss_avg:0.701506
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:1536 Val_loss:0.689469
Epoch:0 Step:1544 Training_loss:0.701554 Training_loss_avg:0.704931
Epoch:0 Step:1552 Training_loss:0.669605 Training_loss_avg:0.703771
Epoch:0 Step:1560 Training_loss:0.707349 Training_loss_avg:0.705774
Epoch:0 Step:1568 Training_loss:0.657212 Training_loss_avg:0.705560
Epoch:0 Step:1576 Training_loss:0.695680 Training_loss_avg:0.705754
Epoch:0 Step:1584 Training_loss:0.693326 Training_loss_avg:0.708400
Epoch:0 Step:1592 Training_loss:0.694824 Training_loss_avg:0.705399
Epoch:0 Step:1600 Training_loss:0.726641 Training_loss_avg:0.705358
Epoch:0 Step:1608 Training_loss:0.692250 Training_loss_avg:0.703232
Epoch:0 Step:1616 Training_loss:0.665498 Training_loss_avg:0.700551
Epoch:0 Step:1624 Training_loss:0.713337 Training_loss_avg:0.702539
Epoch:0 Step:1632 Training_loss:0.696347 Training_loss_avg:0.701879
Validating:


52it [00:07,  6.60it/s]


Epoch:0 Step:1632 Val_loss:0.693541
Epoch:0 Step:1640 Training_loss:0.686749 Training_loss_avg:0.700819
Epoch:0 Step:1648 Training_loss:0.678544 Training_loss_avg:0.702161
Epoch:0 Step:1656 Training_loss:0.673857 Training_loss_avg:0.700653
Epoch:0 Step:1664 Training_loss:0.720515 Training_loss_avg:0.701863
Epoch:0 Step:1672 Training_loss:0.660423 Training_loss_avg:0.701038
Epoch:0 Step:1680 Training_loss:0.672743 Training_loss_avg:0.699549
Epoch:0 Step:1688 Training_loss:0.701831 Training_loss_avg:0.699735
Epoch:0 Step:1696 Training_loss:0.665977 Training_loss_avg:0.699262
Epoch:0 Step:1704 Training_loss:0.725117 Training_loss_avg:0.699569
Epoch:0 Step:1712 Training_loss:0.623520 Training_loss_avg:0.697460
Epoch:0 Step:1720 Training_loss:0.798544 Training_loss_avg:0.697815
Epoch:0 Step:1728 Training_loss:0.669560 Training_loss_avg:0.697799
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:1728 Val_loss:0.680716
Epoch:0 Step:1736 Training_loss:0.648055 Training_loss_avg:0.697212
Epoch:0 Step:1744 Training_loss:0.684597 Training_loss_avg:0.696534
Epoch:0 Step:1752 Training_loss:0.663034 Training_loss_avg:0.695999
Epoch:0 Step:1760 Training_loss:0.740220 Training_loss_avg:0.696838
Epoch:0 Step:1768 Training_loss:0.689117 Training_loss_avg:0.696852
Epoch:0 Step:1776 Training_loss:0.558086 Training_loss_avg:0.693725
Epoch:0 Step:1784 Training_loss:0.575441 Training_loss_avg:0.691919
Epoch:0 Step:1792 Training_loss:0.543556 Training_loss_avg:0.689426
Epoch:0 Step:1800 Training_loss:0.599217 Training_loss_avg:0.687459
Epoch:0 Step:1808 Training_loss:0.815667 Training_loss_avg:0.689783
Epoch:0 Step:1816 Training_loss:0.882281 Training_loss_avg:0.693768
Epoch:0 Step:1824 Training_loss:0.767074 Training_loss_avg:0.692987
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:1824 Val_loss:0.683835
Epoch:0 Step:1832 Training_loss:0.664421 Training_loss_avg:0.691780
Epoch:0 Step:1840 Training_loss:0.816240 Training_loss_avg:0.694495
Epoch:0 Step:1848 Training_loss:0.725781 Training_loss_avg:0.694526
Epoch:0 Step:1856 Training_loss:0.722342 Training_loss_avg:0.695149
Epoch:0 Step:1864 Training_loss:0.649077 Training_loss_avg:0.694044
Epoch:0 Step:1872 Training_loss:0.692244 Training_loss_avg:0.693042
Epoch:0 Step:1880 Training_loss:0.732843 Training_loss_avg:0.694061
Epoch:0 Step:1888 Training_loss:0.642335 Training_loss_avg:0.692673
Epoch:0 Step:1896 Training_loss:0.823769 Training_loss_avg:0.694791
Epoch:0 Step:1904 Training_loss:0.755580 Training_loss_avg:0.694824
Epoch:0 Step:1912 Training_loss:0.666955 Training_loss_avg:0.693680
Epoch:0 Step:1920 Training_loss:0.752012 Training_loss_avg:0.694936
Validating:


52it [00:07,  6.60it/s]


Epoch:0 Step:1920 Val_loss:0.684151
Epoch:0 Step:1928 Training_loss:0.666534 Training_loss_avg:0.694577
Epoch:0 Step:1936 Training_loss:0.697158 Training_loss_avg:0.695293
Epoch:0 Step:1944 Training_loss:0.686305 Training_loss_avg:0.694988
Epoch:0 Step:1952 Training_loss:0.686507 Training_loss_avg:0.695326
Epoch:0 Step:1960 Training_loss:0.692740 Training_loss_avg:0.695034
Epoch:0 Step:1968 Training_loss:0.722492 Training_loss_avg:0.696339
Epoch:0 Step:1976 Training_loss:0.688060 Training_loss_avg:0.696187
Epoch:0 Step:1984 Training_loss:0.701006 Training_loss_avg:0.696340
Epoch:0 Step:1992 Training_loss:0.739715 Training_loss_avg:0.697238
Epoch:0 Step:2000 Training_loss:0.651183 Training_loss_avg:0.695729
Epoch:0 Step:2008 Training_loss:0.680499 Training_loss_avg:0.695494
Epoch:0 Step:2016 Training_loss:0.698995 Training_loss_avg:0.696164
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:2016 Val_loss:0.685729
Epoch:0 Step:2024 Training_loss:0.658257 Training_loss_avg:0.695062
Epoch:0 Step:2032 Training_loss:0.694969 Training_loss_avg:0.695035
Epoch:0 Step:2040 Training_loss:0.722763 Training_loss_avg:0.695755
Epoch:0 Step:2048 Training_loss:0.701693 Training_loss_avg:0.696218
Epoch:0 Step:2056 Training_loss:0.648673 Training_loss_avg:0.695714
Epoch:0 Step:2064 Training_loss:0.698102 Training_loss_avg:0.695266
Epoch:0 Step:2072 Training_loss:0.696395 Training_loss_avg:0.695986
Epoch:0 Step:2080 Training_loss:0.654940 Training_loss_avg:0.695630
Epoch:0 Step:2088 Training_loss:0.734291 Training_loss_avg:0.696279
Epoch:0 Step:2096 Training_loss:0.696556 Training_loss_avg:0.696890
Epoch:0 Step:2104 Training_loss:0.687033 Training_loss_avg:0.696129
Epoch:0 Step:2112 Training_loss:0.663117 Training_loss_avg:0.696921
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:2112 Val_loss:0.684066
Epoch:0 Step:2120 Training_loss:0.654229 Training_loss_avg:0.694034
Epoch:0 Step:2128 Training_loss:0.596167 Training_loss_avg:0.692566
Epoch:0 Step:2136 Training_loss:0.699137 Training_loss_avg:0.693588
Epoch:0 Step:2144 Training_loss:0.754870 Training_loss_avg:0.694994
Epoch:0 Step:2152 Training_loss:0.707162 Training_loss_avg:0.695876
Epoch:0 Step:2160 Training_loss:0.630843 Training_loss_avg:0.693689
Epoch:0 Step:2168 Training_loss:0.630886 Training_loss_avg:0.692524
Epoch:0 Step:2176 Training_loss:0.740322 Training_loss_avg:0.696169
Epoch:0 Step:2184 Training_loss:0.674276 Training_loss_avg:0.698145
Epoch:0 Step:2192 Training_loss:0.742000 Training_loss_avg:0.702114
Epoch:0 Step:2200 Training_loss:0.682634 Training_loss_avg:0.703783
Epoch:0 Step:2208 Training_loss:0.729691 Training_loss_avg:0.702063
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:2208 Val_loss:0.680777
Epoch:0 Step:2216 Training_loss:0.735640 Training_loss_avg:0.699130
Epoch:0 Step:2224 Training_loss:0.628749 Training_loss_avg:0.696364
Epoch:0 Step:2232 Training_loss:0.703300 Training_loss_avg:0.697141
Epoch:0 Step:2240 Training_loss:0.718731 Training_loss_avg:0.695191
Epoch:0 Step:2248 Training_loss:0.739062 Training_loss_avg:0.695457
Epoch:0 Step:2256 Training_loss:0.683501 Training_loss_avg:0.694680
Epoch:0 Step:2264 Training_loss:0.752535 Training_loss_avg:0.696749
Epoch:0 Step:2272 Training_loss:0.648570 Training_loss_avg:0.695876
Epoch:0 Step:2280 Training_loss:0.582202 Training_loss_avg:0.692863
Epoch:0 Step:2288 Training_loss:0.680894 Training_loss_avg:0.693634
Epoch:0 Step:2296 Training_loss:0.654146 Training_loss_avg:0.690242
Epoch:0 Step:2304 Training_loss:0.650453 Training_loss_avg:0.688139
Validating:


52it [00:07,  6.60it/s]


Epoch:0 Step:2304 Val_loss:0.680651
Epoch:0 Step:2312 Training_loss:0.769413 Training_loss_avg:0.690188
Epoch:0 Step:2320 Training_loss:0.631527 Training_loss_avg:0.687778
Epoch:0 Step:2328 Training_loss:0.679491 Training_loss_avg:0.688038
Epoch:0 Step:2336 Training_loss:0.681793 Training_loss_avg:0.687730
Epoch:0 Step:2344 Training_loss:0.725653 Training_loss_avg:0.688517
Epoch:0 Step:2352 Training_loss:0.759398 Training_loss_avg:0.689975
Epoch:0 Step:2360 Training_loss:0.625124 Training_loss_avg:0.688623
Epoch:0 Step:2368 Training_loss:0.606759 Training_loss_avg:0.686308
Epoch:0 Step:2376 Training_loss:0.700716 Training_loss_avg:0.686561
Epoch:0 Step:2384 Training_loss:0.682510 Training_loss_avg:0.686191
Epoch:0 Step:2392 Training_loss:0.622747 Training_loss_avg:0.683852
Epoch:0 Step:2400 Training_loss:0.637077 Training_loss_avg:0.683570
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:2400 Val_loss:0.681490
Epoch:0 Step:2408 Training_loss:0.643514 Training_loss_avg:0.682830
Epoch:0 Step:2416 Training_loss:0.617648 Training_loss_avg:0.681203
Epoch:0 Step:2424 Training_loss:0.778111 Training_loss_avg:0.683600
Epoch:0 Step:2432 Training_loss:0.710646 Training_loss_avg:0.683914
Epoch:0 Step:2440 Training_loss:0.822656 Training_loss_avg:0.685912
Epoch:0 Step:2448 Training_loss:0.591392 Training_loss_avg:0.683706
Epoch:0 Step:2456 Training_loss:0.677307 Training_loss_avg:0.684278
Epoch:0 Step:2464 Training_loss:0.667087 Training_loss_avg:0.683658
Epoch:0 Step:2472 Training_loss:0.676256 Training_loss_avg:0.683255
Epoch:0 Step:2480 Training_loss:0.596309 Training_loss_avg:0.682083
Epoch:0 Step:2488 Training_loss:0.704488 Training_loss_avg:0.681487
Epoch:0 Step:2496 Training_loss:0.847263 Training_loss_avg:0.684501
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:2496 Val_loss:0.683403
Epoch:0 Step:2504 Training_loss:0.580427 Training_loss_avg:0.682369
Epoch:0 Step:2512 Training_loss:0.702326 Training_loss_avg:0.683153
Epoch:0 Step:2520 Training_loss:0.794235 Training_loss_avg:0.685953
Epoch:0 Step:2528 Training_loss:0.701636 Training_loss_avg:0.688062
Epoch:0 Step:2536 Training_loss:0.694524 Training_loss_avg:0.687970
Epoch:0 Step:2544 Training_loss:0.768806 Training_loss_avg:0.688249
Epoch:0 Step:2552 Training_loss:0.596572 Training_loss_avg:0.686037
Epoch:0 Step:2560 Training_loss:0.602139 Training_loss_avg:0.685463
Epoch:0 Step:2568 Training_loss:0.620825 Training_loss_avg:0.685262
Epoch:0 Step:2576 Training_loss:0.541929 Training_loss_avg:0.681294
Epoch:0 Step:2584 Training_loss:0.713192 Training_loss_avg:0.682072
Epoch:0 Step:2592 Training_loss:0.726732 Training_loss_avg:0.681767
Validating:


52it [00:07,  6.60it/s]


Epoch:0 Step:2592 Val_loss:0.682601
Epoch:0 Step:2600 Training_loss:0.669137 Training_loss_avg:0.681497
Epoch:0 Step:2608 Training_loss:0.753764 Training_loss_avg:0.681978
Epoch:0 Step:2616 Training_loss:0.691303 Training_loss_avg:0.681092
Epoch:0 Step:2624 Training_loss:0.694330 Training_loss_avg:0.682403
Epoch:0 Step:2632 Training_loss:0.743614 Training_loss_avg:0.683209
Epoch:0 Step:2640 Training_loss:0.737750 Training_loss_avg:0.683590
Epoch:0 Step:2648 Training_loss:0.760695 Training_loss_avg:0.684022
Epoch:0 Step:2656 Training_loss:0.597168 Training_loss_avg:0.682296
Epoch:0 Step:2664 Training_loss:0.669056 Training_loss_avg:0.680626
Epoch:0 Step:2672 Training_loss:0.697575 Training_loss_avg:0.681606
Epoch:0 Step:2680 Training_loss:0.584993 Training_loss_avg:0.681662
Epoch:0 Step:2688 Training_loss:0.737143 Training_loss_avg:0.682787
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:2688 Val_loss:0.682291
Epoch:0 Step:2696 Training_loss:0.673256 Training_loss_avg:0.683169
Epoch:0 Step:2704 Training_loss:0.683766 Training_loss_avg:0.683836
Epoch:0 Step:2712 Training_loss:0.683814 Training_loss_avg:0.682124
Epoch:0 Step:2720 Training_loss:0.649712 Training_loss_avg:0.682487
Epoch:0 Step:2728 Training_loss:0.736261 Training_loss_avg:0.683623
Epoch:0 Step:2736 Training_loss:0.618240 Training_loss_avg:0.682352
Epoch:0 Step:2744 Training_loss:0.650520 Training_loss_avg:0.680849
Epoch:0 Step:2752 Training_loss:0.767551 Training_loss_avg:0.681012
Epoch:0 Step:2760 Training_loss:0.765740 Training_loss_avg:0.683824
Epoch:0 Step:2768 Training_loss:0.620132 Training_loss_avg:0.684092
Epoch:0 Step:2776 Training_loss:0.641056 Training_loss_avg:0.682899
Epoch:0 Step:2784 Training_loss:0.680612 Training_loss_avg:0.682861
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:2784 Val_loss:0.682011
Epoch:0 Step:2792 Training_loss:0.665176 Training_loss_avg:0.683709
Epoch:0 Step:2800 Training_loss:0.705015 Training_loss_avg:0.685068
Epoch:0 Step:2808 Training_loss:0.603600 Training_loss_avg:0.684270
Epoch:0 Step:2816 Training_loss:0.827540 Training_loss_avg:0.688468
Epoch:0 Step:2824 Training_loss:0.708970 Training_loss_avg:0.687085
Epoch:0 Step:2832 Training_loss:0.649019 Training_loss_avg:0.685852
Epoch:0 Step:2840 Training_loss:0.672086 Training_loss_avg:0.682841
Epoch:0 Step:2848 Training_loss:0.663366 Training_loss_avg:0.684280
Epoch:0 Step:2856 Training_loss:0.627889 Training_loss_avg:0.683292
Epoch:0 Step:2864 Training_loss:0.757550 Training_loss_avg:0.685101
Epoch:0 Step:2872 Training_loss:0.613294 Training_loss_avg:0.683842
Epoch:0 Step:2880 Training_loss:0.829516 Training_loss_avg:0.688506
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:2880 Val_loss:0.681952
Epoch:0 Step:2888 Training_loss:0.712650 Training_loss_avg:0.688669
Epoch:0 Step:2896 Training_loss:0.726463 Training_loss_avg:0.686253
Epoch:0 Step:2904 Training_loss:0.746964 Training_loss_avg:0.689584
Epoch:0 Step:2912 Training_loss:0.735331 Training_loss_avg:0.690244
Epoch:0 Step:2920 Training_loss:0.663857 Training_loss_avg:0.687637
Epoch:0 Step:2928 Training_loss:0.693444 Training_loss_avg:0.687473
Epoch:0 Step:2936 Training_loss:0.688239 Training_loss_avg:0.687347
Epoch:0 Step:2944 Training_loss:0.682914 Training_loss_avg:0.685629
Epoch:0 Step:2952 Training_loss:0.634363 Training_loss_avg:0.686385
Epoch:0 Step:2960 Training_loss:0.678958 Training_loss_avg:0.687921
Epoch:0 Step:2968 Training_loss:0.724811 Training_loss_avg:0.690001
Epoch:0 Step:2976 Training_loss:0.685048 Training_loss_avg:0.692864
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:2976 Val_loss:0.683713
Epoch:0 Step:2984 Training_loss:0.750361 Training_loss_avg:0.693607
Epoch:0 Step:2992 Training_loss:0.660278 Training_loss_avg:0.692278
Epoch:0 Step:3000 Training_loss:0.772083 Training_loss_avg:0.694337
Epoch:0 Step:3008 Training_loss:0.700657 Training_loss_avg:0.693275
Epoch:0 Step:3016 Training_loss:0.672764 Training_loss_avg:0.692904
Epoch:0 Step:3024 Training_loss:0.660113 Training_loss_avg:0.692220
Epoch:0 Step:3032 Training_loss:0.677810 Training_loss_avg:0.690903
Epoch:0 Step:3040 Training_loss:0.718259 Training_loss_avg:0.690514
Epoch:0 Step:3048 Training_loss:0.703770 Training_loss_avg:0.689375
Epoch:0 Step:3056 Training_loss:0.729513 Training_loss_avg:0.692022
Epoch:0 Step:3064 Training_loss:0.688084 Training_loss_avg:0.692403
Epoch:0 Step:3072 Training_loss:0.709602 Training_loss_avg:0.692643
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:3072 Val_loss:0.695127
Epoch:0 Step:3080 Training_loss:0.662152 Training_loss_avg:0.694186
Epoch:0 Step:3088 Training_loss:0.677152 Training_loss_avg:0.692986
Epoch:0 Step:3096 Training_loss:0.657813 Training_loss_avg:0.692678
Epoch:0 Step:3104 Training_loss:0.710238 Training_loss_avg:0.693207
Epoch:0 Step:3112 Training_loss:0.675987 Training_loss_avg:0.693050
Epoch:0 Step:3120 Training_loss:0.653057 Training_loss_avg:0.693117
Epoch:0 Step:3128 Training_loss:0.698086 Training_loss_avg:0.692354
Epoch:0 Step:3136 Training_loss:0.693296 Training_loss_avg:0.693855
Epoch:0 Step:3144 Training_loss:0.656631 Training_loss_avg:0.693977
Epoch:0 Step:3152 Training_loss:0.714377 Training_loss_avg:0.692914
Epoch:0 Step:3160 Training_loss:0.645554 Training_loss_avg:0.690510
Epoch:0 Step:3168 Training_loss:0.706379 Training_loss_avg:0.692235
Validating:


52it [00:07,  6.60it/s]


Epoch:0 Step:3168 Val_loss:0.687131
Epoch:0 Step:3176 Training_loss:0.664194 Training_loss_avg:0.692698
Epoch:0 Step:3184 Training_loss:0.676203 Training_loss_avg:0.692610
Epoch:0 Step:3192 Training_loss:0.675990 Training_loss_avg:0.692826
Epoch:0 Step:3200 Training_loss:0.714036 Training_loss_avg:0.693006
Epoch:0 Step:3208 Training_loss:0.685369 Training_loss_avg:0.694642
Epoch:0 Step:3216 Training_loss:0.742222 Training_loss_avg:0.692935
Epoch:0 Step:3224 Training_loss:0.664741 Training_loss_avg:0.692051
Epoch:0 Step:3232 Training_loss:0.656134 Training_loss_avg:0.692193
Epoch:0 Step:3240 Training_loss:0.648058 Training_loss_avg:0.691712
Epoch:0 Step:3248 Training_loss:0.703495 Training_loss_avg:0.692515
Epoch:0 Step:3256 Training_loss:0.667275 Training_loss_avg:0.693303
Epoch:0 Step:3264 Training_loss:0.643031 Training_loss_avg:0.691012
Validating:


52it [00:07,  6.60it/s]


Epoch:0 Step:3264 Val_loss:0.689603
Epoch:0 Step:3272 Training_loss:0.725997 Training_loss_avg:0.693266
Epoch:0 Step:3280 Training_loss:0.667941 Training_loss_avg:0.690035
Epoch:0 Step:3288 Training_loss:0.763620 Training_loss_avg:0.691054
Epoch:0 Step:3296 Training_loss:0.694665 Training_loss_avg:0.690418
Epoch:0 Step:3304 Training_loss:0.710841 Training_loss_avg:0.689696
Epoch:0 Step:3312 Training_loss:0.680373 Training_loss_avg:0.688597
Epoch:0 Step:3320 Training_loss:0.701544 Training_loss_avg:0.689350
Epoch:0 Step:3328 Training_loss:0.840576 Training_loss_avg:0.692293
Epoch:1 Step:0 Training_loss:0.718702 Training_loss_avg:0.692902
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:0 Val_loss:0.693456
Epoch:1 Step:8 Training_loss:0.672553 Training_loss_avg:0.692695
Epoch:1 Step:16 Training_loss:0.675025 Training_loss_avg:0.693508
Epoch:1 Step:24 Training_loss:0.690081 Training_loss_avg:0.693731
Epoch:1 Step:32 Training_loss:0.693537 Training_loss_avg:0.693105
Epoch:1 Step:40 Training_loss:0.692446 Training_loss_avg:0.693253
Epoch:1 Step:48 Training_loss:0.712092 Training_loss_avg:0.692488
Epoch:1 Step:56 Training_loss:0.723872 Training_loss_avg:0.693760
Epoch:1 Step:64 Training_loss:0.718474 Training_loss_avg:0.692688
Epoch:1 Step:72 Training_loss:0.724532 Training_loss_avg:0.693165
Epoch:1 Step:80 Training_loss:0.687809 Training_loss_avg:0.693466
Epoch:1 Step:88 Training_loss:0.704678 Training_loss_avg:0.694357
Epoch:1 Step:96 Training_loss:0.664376 Training_loss_avg:0.694089
Validating:


52it [00:07,  6.58it/s]


Epoch:1 Step:96 Val_loss:0.693416
Epoch:1 Step:104 Training_loss:0.676224 Training_loss_avg:0.693248
Epoch:1 Step:112 Training_loss:0.691772 Training_loss_avg:0.693008
Epoch:1 Step:120 Training_loss:0.715994 Training_loss_avg:0.692738
Epoch:1 Step:128 Training_loss:0.686622 Training_loss_avg:0.692708
Epoch:1 Step:136 Training_loss:0.731580 Training_loss_avg:0.693148
Epoch:1 Step:144 Training_loss:0.696675 Training_loss_avg:0.693838
Epoch:1 Step:152 Training_loss:0.674124 Training_loss_avg:0.693778
Epoch:1 Step:160 Training_loss:0.720253 Training_loss_avg:0.695027
Epoch:1 Step:168 Training_loss:0.696363 Training_loss_avg:0.694749
Epoch:1 Step:176 Training_loss:0.721813 Training_loss_avg:0.695666
Epoch:1 Step:184 Training_loss:0.690779 Training_loss_avg:0.696420
Epoch:1 Step:192 Training_loss:0.712754 Training_loss_avg:0.696713
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:192 Val_loss:0.685487
Epoch:1 Step:200 Training_loss:0.683558 Training_loss_avg:0.696519
Epoch:1 Step:208 Training_loss:0.673776 Training_loss_avg:0.696862
Epoch:1 Step:216 Training_loss:0.682714 Training_loss_avg:0.696228
Epoch:1 Step:224 Training_loss:0.735165 Training_loss_avg:0.698021
Epoch:1 Step:232 Training_loss:0.634132 Training_loss_avg:0.696576
Epoch:1 Step:240 Training_loss:0.693726 Training_loss_avg:0.697166
Epoch:1 Step:248 Training_loss:0.682985 Training_loss_avg:0.697302
Epoch:1 Step:256 Training_loss:0.681081 Training_loss_avg:0.697404
Epoch:1 Step:264 Training_loss:0.711015 Training_loss_avg:0.697343
Epoch:1 Step:272 Training_loss:0.734569 Training_loss_avg:0.698327
Epoch:1 Step:280 Training_loss:0.753550 Training_loss_avg:0.698554
Epoch:1 Step:288 Training_loss:0.708144 Training_loss_avg:0.699422
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:288 Val_loss:0.684863
Epoch:1 Step:296 Training_loss:0.683361 Training_loss_avg:0.699966
Epoch:1 Step:304 Training_loss:0.682788 Training_loss_avg:0.700661
Epoch:1 Step:312 Training_loss:0.711915 Training_loss_avg:0.700829
Epoch:1 Step:320 Training_loss:0.668555 Training_loss_avg:0.700855
Epoch:1 Step:328 Training_loss:0.662217 Training_loss_avg:0.701239
Epoch:1 Step:336 Training_loss:0.716113 Training_loss_avg:0.701041
Epoch:1 Step:344 Training_loss:0.720258 Training_loss_avg:0.702087
Epoch:1 Step:352 Training_loss:0.752034 Training_loss_avg:0.701856
Epoch:1 Step:360 Training_loss:0.669388 Training_loss_avg:0.701350
Epoch:1 Step:368 Training_loss:0.647870 Training_loss_avg:0.700091
Epoch:1 Step:376 Training_loss:0.656889 Training_loss_avg:0.699621
Epoch:1 Step:384 Training_loss:0.671865 Training_loss_avg:0.699028
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:384 Val_loss:0.684529
Epoch:1 Step:392 Training_loss:0.713625 Training_loss_avg:0.696488
Epoch:1 Step:400 Training_loss:0.653571 Training_loss_avg:0.695186
Epoch:1 Step:408 Training_loss:0.661518 Training_loss_avg:0.694965
Epoch:1 Step:416 Training_loss:0.704211 Training_loss_avg:0.695549
Epoch:1 Step:424 Training_loss:0.694786 Training_loss_avg:0.695643
Epoch:1 Step:432 Training_loss:0.722048 Training_loss_avg:0.696213
Epoch:1 Step:440 Training_loss:0.636392 Training_loss_avg:0.695092
Epoch:1 Step:448 Training_loss:0.600986 Training_loss_avg:0.692870
Epoch:1 Step:456 Training_loss:0.647742 Training_loss_avg:0.691347
Epoch:1 Step:464 Training_loss:0.785068 Training_loss_avg:0.692679
Epoch:1 Step:472 Training_loss:0.646175 Training_loss_avg:0.691112
Epoch:1 Step:480 Training_loss:0.658344 Training_loss_avg:0.690523
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:480 Val_loss:0.683318
Epoch:1 Step:488 Training_loss:0.656991 Training_loss_avg:0.689569
Epoch:1 Step:496 Training_loss:0.767528 Training_loss_avg:0.691632
Epoch:1 Step:504 Training_loss:0.733905 Training_loss_avg:0.692786
Epoch:1 Step:512 Training_loss:0.659441 Training_loss_avg:0.692139
Epoch:1 Step:520 Training_loss:0.655210 Training_loss_avg:0.690923
Epoch:1 Step:528 Training_loss:0.590532 Training_loss_avg:0.689002
Epoch:1 Step:536 Training_loss:0.662467 Training_loss_avg:0.687619
Epoch:1 Step:544 Training_loss:0.632087 Training_loss_avg:0.686328
Epoch:1 Step:552 Training_loss:0.703631 Training_loss_avg:0.686918
Epoch:1 Step:560 Training_loss:0.799479 Training_loss_avg:0.688502
Epoch:1 Step:568 Training_loss:0.537200 Training_loss_avg:0.685319
Epoch:1 Step:576 Training_loss:0.675534 Training_loss_avg:0.684393
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:576 Val_loss:0.688256
Epoch:1 Step:584 Training_loss:0.687447 Training_loss_avg:0.684327
Epoch:1 Step:592 Training_loss:0.721704 Training_loss_avg:0.684506
Epoch:1 Step:600 Training_loss:0.743936 Training_loss_avg:0.685713
Epoch:1 Step:608 Training_loss:0.811100 Training_loss_avg:0.688460
Epoch:1 Step:616 Training_loss:0.681750 Training_loss_avg:0.688441
Epoch:1 Step:624 Training_loss:0.753089 Training_loss_avg:0.688799
Epoch:1 Step:632 Training_loss:0.536021 Training_loss_avg:0.686837
Epoch:1 Step:640 Training_loss:0.722873 Training_loss_avg:0.687420
Epoch:1 Step:648 Training_loss:0.649236 Training_loss_avg:0.686745
Epoch:1 Step:656 Training_loss:0.812807 Training_loss_avg:0.689379
Epoch:1 Step:664 Training_loss:0.725123 Training_loss_avg:0.689662
Epoch:1 Step:672 Training_loss:0.625168 Training_loss_avg:0.687473
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:672 Val_loss:0.682689
Epoch:1 Step:680 Training_loss:0.601948 Training_loss_avg:0.684441
Epoch:1 Step:688 Training_loss:0.792734 Training_loss_avg:0.686133
Epoch:1 Step:696 Training_loss:0.736417 Training_loss_avg:0.687194
Epoch:1 Step:704 Training_loss:0.714858 Training_loss_avg:0.687836
Epoch:1 Step:712 Training_loss:0.593064 Training_loss_avg:0.685459
Epoch:1 Step:720 Training_loss:0.686673 Training_loss_avg:0.685821
Epoch:1 Step:728 Training_loss:0.758013 Training_loss_avg:0.687737
Epoch:1 Step:736 Training_loss:0.671154 Training_loss_avg:0.686838
Epoch:1 Step:744 Training_loss:0.687592 Training_loss_avg:0.686184
Epoch:1 Step:752 Training_loss:0.622924 Training_loss_avg:0.683602
Epoch:1 Step:760 Training_loss:0.663647 Training_loss_avg:0.683487
Epoch:1 Step:768 Training_loss:0.724151 Training_loss_avg:0.685013
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:768 Val_loss:0.682033
Epoch:1 Step:776 Training_loss:0.703092 Training_loss_avg:0.685937
Epoch:1 Step:784 Training_loss:0.721932 Training_loss_avg:0.686938
Epoch:1 Step:792 Training_loss:0.732914 Training_loss_avg:0.687324
Epoch:1 Step:800 Training_loss:0.700254 Training_loss_avg:0.688258
Epoch:1 Step:808 Training_loss:0.698478 Training_loss_avg:0.688997
Epoch:1 Step:816 Training_loss:0.623820 Training_loss_avg:0.687389
Epoch:1 Step:824 Training_loss:0.692420 Training_loss_avg:0.687342
Epoch:1 Step:832 Training_loss:0.725546 Training_loss_avg:0.687412
Epoch:1 Step:840 Training_loss:0.771565 Training_loss_avg:0.690115
Epoch:1 Step:848 Training_loss:0.729677 Training_loss_avg:0.692689
Epoch:1 Step:856 Training_loss:0.647418 Training_loss_avg:0.692683
Epoch:1 Step:864 Training_loss:0.704117 Training_loss_avg:0.691064
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:864 Val_loss:0.685094
Epoch:1 Step:872 Training_loss:0.680494 Training_loss_avg:0.691750
Epoch:1 Step:880 Training_loss:0.685172 Training_loss_avg:0.692287
Epoch:1 Step:888 Training_loss:0.663264 Training_loss_avg:0.692412
Epoch:1 Step:896 Training_loss:0.743844 Training_loss_avg:0.691938
Epoch:1 Step:904 Training_loss:0.770943 Training_loss_avg:0.692679
Epoch:1 Step:912 Training_loss:0.713710 Training_loss_avg:0.693765
Epoch:1 Step:920 Training_loss:0.697311 Training_loss_avg:0.694607
Epoch:1 Step:928 Training_loss:0.693264 Training_loss_avg:0.696661
Epoch:1 Step:936 Training_loss:0.709652 Training_loss_avg:0.697605
Epoch:1 Step:944 Training_loss:0.659589 Training_loss_avg:0.698155
Epoch:1 Step:952 Training_loss:0.661650 Training_loss_avg:0.697315
Epoch:1 Step:960 Training_loss:0.665726 Training_loss_avg:0.694640
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:960 Val_loss:0.687138
Epoch:1 Step:968 Training_loss:0.661450 Training_loss_avg:0.697125
Epoch:1 Step:976 Training_loss:0.675656 Training_loss_avg:0.697128
Epoch:1 Step:984 Training_loss:0.683368 Training_loss_avg:0.697046
Epoch:1 Step:992 Training_loss:0.670696 Training_loss_avg:0.696026
Epoch:1 Step:1000 Training_loss:0.713371 Training_loss_avg:0.695415
Epoch:1 Step:1008 Training_loss:0.668991 Training_loss_avg:0.692573
Epoch:1 Step:1016 Training_loss:0.657342 Training_loss_avg:0.692084
Epoch:1 Step:1024 Training_loss:0.613809 Training_loss_avg:0.689299
Epoch:1 Step:1032 Training_loss:0.644500 Training_loss_avg:0.691468
Epoch:1 Step:1040 Training_loss:0.670116 Training_loss_avg:0.690413
Epoch:1 Step:1048 Training_loss:0.685172 Training_loss_avg:0.691132
Epoch:1 Step:1056 Training_loss:0.815271 Training_loss_avg:0.691181
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:1056 Val_loss:0.682164
Epoch:1 Step:1064 Training_loss:0.670176 Training_loss_avg:0.690082
Epoch:1 Step:1072 Training_loss:0.703673 Training_loss_avg:0.691652
Epoch:1 Step:1080 Training_loss:0.765222 Training_loss_avg:0.694918
Epoch:1 Step:1088 Training_loss:0.685348 Training_loss_avg:0.692770
Epoch:1 Step:1096 Training_loss:0.733070 Training_loss_avg:0.692703
Epoch:1 Step:1104 Training_loss:0.607036 Training_loss_avg:0.690547
Epoch:1 Step:1112 Training_loss:0.685449 Training_loss_avg:0.692394
Epoch:1 Step:1120 Training_loss:0.640407 Training_loss_avg:0.691469
Epoch:1 Step:1128 Training_loss:0.718783 Training_loss_avg:0.690685
Epoch:1 Step:1136 Training_loss:0.669383 Training_loss_avg:0.690649
Epoch:1 Step:1144 Training_loss:0.587795 Training_loss_avg:0.688653
Epoch:1 Step:1152 Training_loss:0.725071 Training_loss_avg:0.690696
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:1152 Val_loss:0.682016
Epoch:1 Step:1160 Training_loss:0.673173 Training_loss_avg:0.690887
Epoch:1 Step:1168 Training_loss:0.690063 Training_loss_avg:0.690205
Epoch:1 Step:1176 Training_loss:0.677649 Training_loss_avg:0.689696
Epoch:1 Step:1184 Training_loss:0.610267 Training_loss_avg:0.687463
Epoch:1 Step:1192 Training_loss:0.663860 Training_loss_avg:0.686082
Epoch:1 Step:1200 Training_loss:0.770338 Training_loss_avg:0.687483
Epoch:1 Step:1208 Training_loss:0.686541 Training_loss_avg:0.687245
Epoch:1 Step:1216 Training_loss:0.750771 Training_loss_avg:0.689784
Epoch:1 Step:1224 Training_loss:0.669945 Training_loss_avg:0.689334
Epoch:1 Step:1232 Training_loss:0.726770 Training_loss_avg:0.689359
Epoch:1 Step:1240 Training_loss:0.704533 Training_loss_avg:0.688018
Epoch:1 Step:1248 Training_loss:0.743485 Training_loss_avg:0.688294
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:1248 Val_loss:0.681536
Epoch:1 Step:1256 Training_loss:0.716905 Training_loss_avg:0.689684
Epoch:1 Step:1264 Training_loss:0.601712 Training_loss_avg:0.687636
Epoch:1 Step:1272 Training_loss:0.697440 Training_loss_avg:0.687975
Epoch:1 Step:1280 Training_loss:0.618671 Training_loss_avg:0.686645
Epoch:1 Step:1288 Training_loss:0.646639 Training_loss_avg:0.686312
Epoch:1 Step:1296 Training_loss:0.621349 Training_loss_avg:0.683862
Epoch:1 Step:1304 Training_loss:0.639290 Training_loss_avg:0.681229
Epoch:1 Step:1312 Training_loss:0.648468 Training_loss_avg:0.679924
Epoch:1 Step:1320 Training_loss:0.799070 Training_loss_avg:0.681960
Epoch:1 Step:1328 Training_loss:0.785808 Training_loss_avg:0.683810
Epoch:1 Step:1336 Training_loss:0.722382 Training_loss_avg:0.684065
Epoch:1 Step:1344 Training_loss:0.669950 Training_loss_avg:0.684272
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:1344 Val_loss:0.681717
Epoch:1 Step:1352 Training_loss:0.673280 Training_loss_avg:0.684505
Epoch:1 Step:1360 Training_loss:0.765710 Training_loss_avg:0.686505
Epoch:1 Step:1368 Training_loss:0.688458 Training_loss_avg:0.687045
Epoch:1 Step:1376 Training_loss:0.672043 Training_loss_avg:0.686972
Epoch:1 Step:1384 Training_loss:0.664092 Training_loss_avg:0.686587
Epoch:1 Step:1392 Training_loss:0.709109 Training_loss_avg:0.687355
Epoch:1 Step:1400 Training_loss:0.700480 Training_loss_avg:0.687097
Epoch:1 Step:1408 Training_loss:0.719697 Training_loss_avg:0.688111
Epoch:1 Step:1416 Training_loss:0.827130 Training_loss_avg:0.691507
Epoch:1 Step:1424 Training_loss:0.753662 Training_loss_avg:0.694304
Epoch:1 Step:1432 Training_loss:0.711456 Training_loss_avg:0.695643
Epoch:1 Step:1440 Training_loss:0.675241 Training_loss_avg:0.695746
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:1440 Val_loss:0.682774
Epoch:1 Step:1448 Training_loss:0.725454 Training_loss_avg:0.696552
Epoch:1 Step:1456 Training_loss:0.717488 Training_loss_avg:0.694596
Epoch:1 Step:1464 Training_loss:0.699017 Training_loss_avg:0.695173
Epoch:1 Step:1472 Training_loss:0.673832 Training_loss_avg:0.694576
Epoch:1 Step:1480 Training_loss:0.682249 Training_loss_avg:0.692916
Epoch:1 Step:1488 Training_loss:0.653116 Training_loss_avg:0.692272
Epoch:1 Step:1496 Training_loss:0.721446 Training_loss_avg:0.692039
Epoch:1 Step:1504 Training_loss:0.687627 Training_loss_avg:0.693651
Epoch:1 Step:1512 Training_loss:0.679108 Training_loss_avg:0.693524
Epoch:1 Step:1520 Training_loss:0.695114 Training_loss_avg:0.694618
Epoch:1 Step:1528 Training_loss:0.732474 Training_loss_avg:0.694892
Epoch:1 Step:1536 Training_loss:0.675966 Training_loss_avg:0.695024
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:1536 Val_loss:0.687355
Epoch:1 Step:1544 Training_loss:0.706541 Training_loss_avg:0.697399
Epoch:1 Step:1552 Training_loss:0.699517 Training_loss_avg:0.696888
Epoch:1 Step:1560 Training_loss:0.695072 Training_loss_avg:0.697326
Epoch:1 Step:1568 Training_loss:0.701185 Training_loss_avg:0.697548
Epoch:1 Step:1576 Training_loss:0.712708 Training_loss_avg:0.698249
Epoch:1 Step:1584 Training_loss:0.709284 Training_loss_avg:0.700230
Epoch:1 Step:1592 Training_loss:0.712429 Training_loss_avg:0.701201
Epoch:1 Step:1600 Training_loss:0.658013 Training_loss_avg:0.698954
Epoch:1 Step:1608 Training_loss:0.671096 Training_loss_avg:0.698646
Epoch:1 Step:1616 Training_loss:0.687290 Training_loss_avg:0.697376
Epoch:1 Step:1624 Training_loss:0.725792 Training_loss_avg:0.698493
Epoch:1 Step:1632 Training_loss:0.642587 Training_loss_avg:0.696809
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:1632 Val_loss:0.690875
Epoch:1 Step:1640 Training_loss:0.684949 Training_loss_avg:0.696418
Epoch:1 Step:1648 Training_loss:0.723735 Training_loss_avg:0.696023
Epoch:1 Step:1656 Training_loss:0.686471 Training_loss_avg:0.695414
Epoch:1 Step:1664 Training_loss:0.701613 Training_loss_avg:0.697412
Epoch:1 Step:1672 Training_loss:0.699688 Training_loss_avg:0.697457
Epoch:1 Step:1680 Training_loss:0.664423 Training_loss_avg:0.698372
Epoch:1 Step:1688 Training_loss:0.723167 Training_loss_avg:0.699902
Epoch:1 Step:1696 Training_loss:0.685313 Training_loss_avg:0.701182
Epoch:1 Step:1704 Training_loss:0.678039 Training_loss_avg:0.701957
Epoch:1 Step:1712 Training_loss:0.691085 Training_loss_avg:0.702809
Epoch:1 Step:1720 Training_loss:0.693135 Training_loss_avg:0.700690
Epoch:1 Step:1728 Training_loss:0.719034 Training_loss_avg:0.699355
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:1728 Val_loss:0.693292
Epoch:1 Step:1736 Training_loss:0.759120 Training_loss_avg:0.700090
Epoch:1 Step:1744 Training_loss:0.705564 Training_loss_avg:0.700802
Epoch:1 Step:1752 Training_loss:0.685400 Training_loss_avg:0.701044
Epoch:1 Step:1760 Training_loss:0.695483 Training_loss_avg:0.699640
Epoch:1 Step:1768 Training_loss:0.728003 Training_loss_avg:0.700431
Epoch:1 Step:1776 Training_loss:0.709865 Training_loss_avg:0.701187
Epoch:1 Step:1784 Training_loss:0.688439 Training_loss_avg:0.701674
Epoch:1 Step:1792 Training_loss:0.657264 Training_loss_avg:0.700637
Epoch:1 Step:1800 Training_loss:0.638326 Training_loss_avg:0.699394
Epoch:1 Step:1808 Training_loss:0.675112 Training_loss_avg:0.698502
Epoch:1 Step:1816 Training_loss:0.649917 Training_loss_avg:0.694958
Epoch:1 Step:1824 Training_loss:0.779288 Training_loss_avg:0.695471
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:1824 Val_loss:0.680187
Epoch:1 Step:1832 Training_loss:0.790367 Training_loss_avg:0.697049
Epoch:1 Step:1840 Training_loss:0.753618 Training_loss_avg:0.698616
Epoch:1 Step:1848 Training_loss:0.663219 Training_loss_avg:0.697372
Epoch:1 Step:1856 Training_loss:0.679511 Training_loss_avg:0.696612
Epoch:1 Step:1864 Training_loss:0.670346 Training_loss_avg:0.696039
Epoch:1 Step:1872 Training_loss:0.646798 Training_loss_avg:0.695498
Epoch:1 Step:1880 Training_loss:0.727254 Training_loss_avg:0.696398
Epoch:1 Step:1888 Training_loss:0.659991 Training_loss_avg:0.696536
Epoch:1 Step:1896 Training_loss:0.638474 Training_loss_avg:0.694876
Epoch:1 Step:1904 Training_loss:0.702314 Training_loss_avg:0.695170
Epoch:1 Step:1912 Training_loss:0.678542 Training_loss_avg:0.695159
Epoch:1 Step:1920 Training_loss:0.719600 Training_loss_avg:0.695648
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:1920 Val_loss:0.680373
Epoch:1 Step:1928 Training_loss:0.625377 Training_loss_avg:0.693506
Epoch:1 Step:1936 Training_loss:0.758682 Training_loss_avg:0.695161
Epoch:1 Step:1944 Training_loss:0.738907 Training_loss_avg:0.695808
Epoch:1 Step:1952 Training_loss:0.720291 Training_loss_avg:0.696224
Epoch:1 Step:1960 Training_loss:0.713319 Training_loss_avg:0.696588
Epoch:1 Step:1968 Training_loss:0.655792 Training_loss_avg:0.695681
Epoch:1 Step:1976 Training_loss:0.693479 Training_loss_avg:0.695296
Epoch:1 Step:1984 Training_loss:0.613126 Training_loss_avg:0.693373
Epoch:1 Step:1992 Training_loss:0.744023 Training_loss_avg:0.694005
Epoch:1 Step:2000 Training_loss:0.609754 Training_loss_avg:0.693040
Epoch:1 Step:2008 Training_loss:0.716937 Training_loss_avg:0.693956
Epoch:1 Step:2016 Training_loss:0.704586 Training_loss_avg:0.694302
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:2016 Val_loss:0.681122
Epoch:1 Step:2024 Training_loss:0.682290 Training_loss_avg:0.693432
Epoch:1 Step:2032 Training_loss:0.695504 Training_loss_avg:0.694491
Epoch:1 Step:2040 Training_loss:0.748936 Training_loss_avg:0.695770
Epoch:1 Step:2048 Training_loss:0.629094 Training_loss_avg:0.693878
Epoch:1 Step:2056 Training_loss:0.728304 Training_loss_avg:0.694714
Epoch:1 Step:2064 Training_loss:0.697584 Training_loss_avg:0.694634
Epoch:1 Step:2072 Training_loss:0.699086 Training_loss_avg:0.694622
Epoch:1 Step:2080 Training_loss:0.711305 Training_loss_avg:0.695559
Epoch:1 Step:2088 Training_loss:0.665062 Training_loss_avg:0.694397
Epoch:1 Step:2096 Training_loss:0.697979 Training_loss_avg:0.694650
Epoch:1 Step:2104 Training_loss:0.681320 Training_loss_avg:0.694716
Epoch:1 Step:2112 Training_loss:0.711983 Training_loss_avg:0.695134
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:2112 Val_loss:0.686512
Epoch:1 Step:2120 Training_loss:0.665006 Training_loss_avg:0.694571
Epoch:1 Step:2128 Training_loss:0.701651 Training_loss_avg:0.694224
Epoch:1 Step:2136 Training_loss:0.674877 Training_loss_avg:0.692539
Epoch:1 Step:2144 Training_loss:0.707454 Training_loss_avg:0.692577
Epoch:1 Step:2152 Training_loss:0.722178 Training_loss_avg:0.693312
Epoch:1 Step:2160 Training_loss:0.709367 Training_loss_avg:0.693590
Epoch:1 Step:2168 Training_loss:0.685827 Training_loss_avg:0.692746
Epoch:1 Step:2176 Training_loss:0.685943 Training_loss_avg:0.692268
Epoch:1 Step:2184 Training_loss:0.678308 Training_loss_avg:0.692065
Epoch:1 Step:2192 Training_loss:0.663203 Training_loss_avg:0.692184
Epoch:1 Step:2200 Training_loss:0.692214 Training_loss_avg:0.693262
Epoch:1 Step:2208 Training_loss:0.727605 Training_loss_avg:0.694312
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:2208 Val_loss:0.684441
Epoch:1 Step:2216 Training_loss:0.673539 Training_loss_avg:0.694784
Epoch:1 Step:2224 Training_loss:0.715207 Training_loss_avg:0.693503
Epoch:1 Step:2232 Training_loss:0.691568 Training_loss_avg:0.691527
Epoch:1 Step:2240 Training_loss:0.685082 Training_loss_avg:0.690156
Epoch:1 Step:2248 Training_loss:0.685340 Training_loss_avg:0.690598
Epoch:1 Step:2256 Training_loss:0.658068 Training_loss_avg:0.690169
Epoch:1 Step:2264 Training_loss:0.671089 Training_loss_avg:0.690184
Epoch:1 Step:2272 Training_loss:0.672658 Training_loss_avg:0.690702
Epoch:1 Step:2280 Training_loss:0.660854 Training_loss_avg:0.689374
Epoch:1 Step:2288 Training_loss:0.650375 Training_loss_avg:0.689181
Epoch:1 Step:2296 Training_loss:0.637893 Training_loss_avg:0.689170
Epoch:1 Step:2304 Training_loss:0.708622 Training_loss_avg:0.689296
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:2304 Val_loss:0.679572
Epoch:1 Step:2312 Training_loss:0.716428 Training_loss_avg:0.690053
Epoch:1 Step:2320 Training_loss:0.695124 Training_loss_avg:0.689564
Epoch:1 Step:2328 Training_loss:0.634279 Training_loss_avg:0.689742
Epoch:1 Step:2336 Training_loss:0.745922 Training_loss_avg:0.689487
Epoch:1 Step:2344 Training_loss:0.667989 Training_loss_avg:0.688068
Epoch:1 Step:2352 Training_loss:0.745254 Training_loss_avg:0.688568
Epoch:1 Step:2360 Training_loss:0.698787 Training_loss_avg:0.688277
Epoch:1 Step:2368 Training_loss:0.609412 Training_loss_avg:0.687349
Epoch:1 Step:2376 Training_loss:0.681735 Training_loss_avg:0.687115
Epoch:1 Step:2384 Training_loss:0.705835 Training_loss_avg:0.688969
Epoch:1 Step:2392 Training_loss:0.826620 Training_loss_avg:0.690621
Epoch:1 Step:2400 Training_loss:0.709856 Training_loss_avg:0.692623
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:2400 Val_loss:0.679353
Epoch:1 Step:2408 Training_loss:0.650647 Training_loss_avg:0.691297
Epoch:1 Step:2416 Training_loss:0.680938 Training_loss_avg:0.690824
Epoch:1 Step:2424 Training_loss:0.655028 Training_loss_avg:0.690279
Epoch:1 Step:2432 Training_loss:0.622962 Training_loss_avg:0.688828
Epoch:1 Step:2440 Training_loss:0.692000 Training_loss_avg:0.687689
Epoch:1 Step:2448 Training_loss:0.670912 Training_loss_avg:0.688526
Epoch:1 Step:2456 Training_loss:0.777631 Training_loss_avg:0.689512
Epoch:1 Step:2464 Training_loss:0.734240 Training_loss_avg:0.690245
Epoch:1 Step:2472 Training_loss:0.715990 Training_loss_avg:0.690583
Epoch:1 Step:2480 Training_loss:0.689963 Training_loss_avg:0.690156
Epoch:1 Step:2488 Training_loss:0.659889 Training_loss_avg:0.690053
Epoch:1 Step:2496 Training_loss:0.676886 Training_loss_avg:0.689631
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:2496 Val_loss:0.679794
Epoch:1 Step:2504 Training_loss:0.707778 Training_loss_avg:0.690160
Epoch:1 Step:2512 Training_loss:0.791822 Training_loss_avg:0.691757
Epoch:1 Step:2520 Training_loss:0.723894 Training_loss_avg:0.692935
Epoch:1 Step:2528 Training_loss:0.694856 Training_loss_avg:0.692799
Epoch:1 Step:2536 Training_loss:0.724953 Training_loss_avg:0.693800
Epoch:1 Step:2544 Training_loss:0.680304 Training_loss_avg:0.693257
Epoch:1 Step:2552 Training_loss:0.714566 Training_loss_avg:0.693105
Epoch:1 Step:2560 Training_loss:0.729285 Training_loss_avg:0.693504
Epoch:1 Step:2568 Training_loss:0.686018 Training_loss_avg:0.693507
Epoch:1 Step:2576 Training_loss:0.651088 Training_loss_avg:0.692810
Epoch:1 Step:2584 Training_loss:0.692021 Training_loss_avg:0.693085
Epoch:1 Step:2592 Training_loss:0.696493 Training_loss_avg:0.693750
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:2592 Val_loss:0.684031
Epoch:1 Step:2600 Training_loss:0.684525 Training_loss_avg:0.693597
Epoch:1 Step:2608 Training_loss:0.685593 Training_loss_avg:0.692756
Epoch:1 Step:2616 Training_loss:0.657099 Training_loss_avg:0.692428
Epoch:1 Step:2624 Training_loss:0.674908 Training_loss_avg:0.691622
Epoch:1 Step:2632 Training_loss:0.689587 Training_loss_avg:0.691582
Epoch:1 Step:2640 Training_loss:0.720168 Training_loss_avg:0.692284
Epoch:1 Step:2648 Training_loss:0.712293 Training_loss_avg:0.692823
Epoch:1 Step:2656 Training_loss:0.741317 Training_loss_avg:0.694488
Epoch:1 Step:2664 Training_loss:0.671093 Training_loss_avg:0.694488
Epoch:1 Step:2672 Training_loss:0.694070 Training_loss_avg:0.694916
Epoch:1 Step:2680 Training_loss:0.708987 Training_loss_avg:0.695879
Epoch:1 Step:2688 Training_loss:0.719167 Training_loss_avg:0.697255
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:2688 Val_loss:0.698274
Epoch:1 Step:2696 Training_loss:0.725147 Training_loss_avg:0.699000
Epoch:1 Step:2704 Training_loss:0.690185 Training_loss_avg:0.698631
Epoch:1 Step:2712 Training_loss:0.676566 Training_loss_avg:0.697834
Epoch:1 Step:2720 Training_loss:0.675453 Training_loss_avg:0.697440
Epoch:1 Step:2728 Training_loss:0.713673 Training_loss_avg:0.699028
Epoch:1 Step:2736 Training_loss:0.684507 Training_loss_avg:0.697800
Epoch:1 Step:2744 Training_loss:0.700693 Training_loss_avg:0.698454
Epoch:1 Step:2752 Training_loss:0.681474 Training_loss_avg:0.697178
Epoch:1 Step:2760 Training_loss:0.697282 Training_loss_avg:0.697148
Epoch:1 Step:2768 Training_loss:0.646688 Training_loss_avg:0.697894
Epoch:1 Step:2776 Training_loss:0.676692 Training_loss_avg:0.697793
Epoch:1 Step:2784 Training_loss:0.722253 Training_loss_avg:0.698121
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:2784 Val_loss:0.689635
Epoch:1 Step:2792 Training_loss:0.672517 Training_loss_avg:0.695039
Epoch:1 Step:2800 Training_loss:0.711978 Training_loss_avg:0.695082
Epoch:1 Step:2808 Training_loss:0.676740 Training_loss_avg:0.695604
Epoch:1 Step:2816 Training_loss:0.703633 Training_loss_avg:0.696057
Epoch:1 Step:2824 Training_loss:0.699719 Training_loss_avg:0.696951
Epoch:1 Step:2832 Training_loss:0.658861 Training_loss_avg:0.697669
Epoch:1 Step:2840 Training_loss:0.631904 Training_loss_avg:0.696467
Epoch:1 Step:2848 Training_loss:0.639797 Training_loss_avg:0.695845
Epoch:1 Step:2856 Training_loss:0.701989 Training_loss_avg:0.694332
Epoch:1 Step:2864 Training_loss:0.699970 Training_loss_avg:0.693647
Epoch:1 Step:2872 Training_loss:0.720369 Training_loss_avg:0.693734
Epoch:1 Step:2880 Training_loss:0.657117 Training_loss_avg:0.693077
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:2880 Val_loss:0.680228
Epoch:1 Step:2888 Training_loss:0.709807 Training_loss_avg:0.694076
Epoch:1 Step:2896 Training_loss:0.698177 Training_loss_avg:0.694502
Epoch:1 Step:2904 Training_loss:0.612583 Training_loss_avg:0.692598
Epoch:1 Step:2912 Training_loss:0.782446 Training_loss_avg:0.692410
Epoch:1 Step:2920 Training_loss:0.691398 Training_loss_avg:0.691760
Epoch:1 Step:2928 Training_loss:0.645122 Training_loss_avg:0.690766
Epoch:1 Step:2936 Training_loss:0.645403 Training_loss_avg:0.689175
Epoch:1 Step:2944 Training_loss:0.565657 Training_loss_avg:0.686882
Epoch:1 Step:2952 Training_loss:0.629065 Training_loss_avg:0.685172
Epoch:1 Step:2960 Training_loss:0.692391 Training_loss_avg:0.684434
Epoch:1 Step:2968 Training_loss:0.678411 Training_loss_avg:0.684282
Epoch:1 Step:2976 Training_loss:0.610316 Training_loss_avg:0.683466
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:2976 Val_loss:0.681324
Epoch:1 Step:2984 Training_loss:0.652236 Training_loss_avg:0.682670
Epoch:1 Step:2992 Training_loss:0.706524 Training_loss_avg:0.682871
Epoch:1 Step:3000 Training_loss:0.678267 Training_loss_avg:0.682746
Epoch:1 Step:3008 Training_loss:0.679784 Training_loss_avg:0.682630
Epoch:1 Step:3016 Training_loss:0.735341 Training_loss_avg:0.684195
Epoch:1 Step:3024 Training_loss:0.590049 Training_loss_avg:0.682497
Epoch:1 Step:3032 Training_loss:0.783466 Training_loss_avg:0.684375
Epoch:1 Step:3040 Training_loss:0.737887 Training_loss_avg:0.684729
Epoch:1 Step:3048 Training_loss:0.604289 Training_loss_avg:0.682569
Epoch:1 Step:3056 Training_loss:0.889480 Training_loss_avg:0.685533
Epoch:1 Step:3064 Training_loss:0.720179 Training_loss_avg:0.686514
Epoch:1 Step:3072 Training_loss:0.714013 Training_loss_avg:0.686913
Validating:


52it [00:07,  6.60it/s]


Epoch:1 Step:3072 Val_loss:0.681716
Epoch:1 Step:3080 Training_loss:0.790145 Training_loss_avg:0.688536
Epoch:1 Step:3088 Training_loss:0.624655 Training_loss_avg:0.686646
Epoch:1 Step:3096 Training_loss:0.602311 Training_loss_avg:0.684189
Epoch:1 Step:3104 Training_loss:0.658909 Training_loss_avg:0.683564
Epoch:1 Step:3112 Training_loss:0.723310 Training_loss_avg:0.684499
Epoch:1 Step:3120 Training_loss:0.645882 Training_loss_avg:0.683907
Epoch:1 Step:3128 Training_loss:0.676463 Training_loss_avg:0.683163
Epoch:1 Step:3136 Training_loss:0.670696 Training_loss_avg:0.682887
Epoch:1 Step:3144 Training_loss:0.627859 Training_loss_avg:0.681430
Epoch:1 Step:3152 Training_loss:0.863110 Training_loss_avg:0.685063
Epoch:1 Step:3160 Training_loss:0.649876 Training_loss_avg:0.684115
Epoch:1 Step:3168 Training_loss:0.666685 Training_loss_avg:0.684515
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:3168 Val_loss:0.681175
Epoch:1 Step:3176 Training_loss:0.631990 Training_loss_avg:0.683621
Epoch:1 Step:3184 Training_loss:0.717390 Training_loss_avg:0.683523
Epoch:1 Step:3192 Training_loss:0.634785 Training_loss_avg:0.682769
Epoch:1 Step:3200 Training_loss:0.680179 Training_loss_avg:0.682133
Epoch:1 Step:3208 Training_loss:0.663018 Training_loss_avg:0.681858
Epoch:1 Step:3216 Training_loss:0.717250 Training_loss_avg:0.682131
Epoch:1 Step:3224 Training_loss:0.599136 Training_loss_avg:0.680119
Epoch:1 Step:3232 Training_loss:0.698778 Training_loss_avg:0.680917
Epoch:1 Step:3240 Training_loss:0.701420 Training_loss_avg:0.682308
Epoch:1 Step:3248 Training_loss:0.767630 Training_loss_avg:0.684864
Epoch:1 Step:3256 Training_loss:0.709821 Training_loss_avg:0.685021
Epoch:1 Step:3264 Training_loss:0.686978 Training_loss_avg:0.684761
Validating:


52it [00:07,  6.59it/s]


Epoch:1 Step:3264 Val_loss:0.681721
Epoch:1 Step:3272 Training_loss:0.650905 Training_loss_avg:0.683372
Epoch:1 Step:3280 Training_loss:0.699949 Training_loss_avg:0.684228
Epoch:1 Step:3288 Training_loss:0.669177 Training_loss_avg:0.683416
Epoch:1 Step:3296 Training_loss:0.731285 Training_loss_avg:0.684078
Epoch:1 Step:3304 Training_loss:0.716218 Training_loss_avg:0.686151
Epoch:1 Step:3312 Training_loss:0.718161 Training_loss_avg:0.684865
Epoch:1 Step:3320 Training_loss:0.627155 Training_loss_avg:0.683580
Epoch:1 Step:3328 Training_loss:0.644569 Training_loss_avg:0.683569
Epoch:2 Step:0 Training_loss:0.698238 Training_loss_avg:0.684626
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:0 Val_loss:0.682117
Epoch:2 Step:8 Training_loss:0.710249 Training_loss_avg:0.687518
Epoch:2 Step:16 Training_loss:0.729461 Training_loss_avg:0.689526
Epoch:2 Step:24 Training_loss:0.663453 Training_loss_avg:0.688947
Epoch:2 Step:32 Training_loss:0.730985 Training_loss_avg:0.689998
Epoch:2 Step:40 Training_loss:0.752309 Training_loss_avg:0.692838
Epoch:2 Step:48 Training_loss:0.668913 Training_loss_avg:0.693172
Epoch:2 Step:56 Training_loss:0.706222 Training_loss_avg:0.693166
Epoch:2 Step:64 Training_loss:0.668667 Training_loss_avg:0.692974
Epoch:2 Step:72 Training_loss:0.647571 Training_loss_avg:0.692329
Epoch:2 Step:80 Training_loss:0.687856 Training_loss_avg:0.691380
Epoch:2 Step:88 Training_loss:0.660242 Training_loss_avg:0.692783
Epoch:2 Step:96 Training_loss:0.683392 Training_loss_avg:0.690782
Validating:


52it [00:07,  6.58it/s]


Epoch:2 Step:96 Val_loss:0.683524
Epoch:2 Step:104 Training_loss:0.657048 Training_loss_avg:0.689165
Epoch:2 Step:112 Training_loss:0.737064 Training_loss_avg:0.691821
Epoch:2 Step:120 Training_loss:0.692127 Training_loss_avg:0.687874
Epoch:2 Step:128 Training_loss:0.653953 Training_loss_avg:0.686549
Epoch:2 Step:136 Training_loss:0.640811 Training_loss_avg:0.685085
Epoch:2 Step:144 Training_loss:0.628752 Training_loss_avg:0.681857
Epoch:2 Step:152 Training_loss:0.723024 Training_loss_avg:0.683825
Epoch:2 Step:160 Training_loss:0.671276 Training_loss_avg:0.685204
Epoch:2 Step:168 Training_loss:0.595718 Training_loss_avg:0.683940
Epoch:2 Step:176 Training_loss:0.595385 Training_loss_avg:0.681382
Epoch:2 Step:184 Training_loss:0.640392 Training_loss_avg:0.681272
Epoch:2 Step:192 Training_loss:0.713488 Training_loss_avg:0.682012
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:192 Val_loss:0.681313
Epoch:2 Step:200 Training_loss:0.546430 Training_loss_avg:0.679527
Epoch:2 Step:208 Training_loss:0.730452 Training_loss_avg:0.681579
Epoch:2 Step:216 Training_loss:0.691246 Training_loss_avg:0.678142
Epoch:2 Step:224 Training_loss:0.678392 Training_loss_avg:0.678712
Epoch:2 Step:232 Training_loss:0.759297 Training_loss_avg:0.680564
Epoch:2 Step:240 Training_loss:0.752473 Training_loss_avg:0.682974
Epoch:2 Step:248 Training_loss:0.625121 Training_loss_avg:0.681128
Epoch:2 Step:256 Training_loss:0.680085 Training_loss_avg:0.682034
Epoch:2 Step:264 Training_loss:0.613008 Training_loss_avg:0.680691
Epoch:2 Step:272 Training_loss:0.730167 Training_loss_avg:0.682034
Epoch:2 Step:280 Training_loss:0.717051 Training_loss_avg:0.682030
Epoch:2 Step:288 Training_loss:0.645379 Training_loss_avg:0.682955
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:288 Val_loss:0.681019
Epoch:2 Step:296 Training_loss:0.741646 Training_loss_avg:0.683812
Epoch:2 Step:304 Training_loss:0.605004 Training_loss_avg:0.681884
Epoch:2 Step:312 Training_loss:0.687745 Training_loss_avg:0.680286
Epoch:2 Step:320 Training_loss:0.671218 Training_loss_avg:0.679514
Epoch:2 Step:328 Training_loss:0.793996 Training_loss_avg:0.681655
Epoch:2 Step:336 Training_loss:0.662424 Training_loss_avg:0.681885
Epoch:2 Step:344 Training_loss:0.609180 Training_loss_avg:0.680070
Epoch:2 Step:352 Training_loss:0.631379 Training_loss_avg:0.679314
Epoch:2 Step:360 Training_loss:0.706777 Training_loss_avg:0.678823
Epoch:2 Step:368 Training_loss:0.586031 Training_loss_avg:0.676220
Epoch:2 Step:376 Training_loss:0.630731 Training_loss_avg:0.674471
Epoch:2 Step:384 Training_loss:0.730872 Training_loss_avg:0.676545
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:384 Val_loss:0.680309
Epoch:2 Step:392 Training_loss:0.788412 Training_loss_avg:0.679422
Epoch:2 Step:400 Training_loss:0.579717 Training_loss_avg:0.677052
Epoch:2 Step:408 Training_loss:0.777721 Training_loss_avg:0.678401
Epoch:2 Step:416 Training_loss:0.647298 Training_loss_avg:0.676758
Epoch:2 Step:424 Training_loss:0.658427 Training_loss_avg:0.676657
Epoch:2 Step:432 Training_loss:0.654967 Training_loss_avg:0.675137
Epoch:2 Step:440 Training_loss:0.734924 Training_loss_avg:0.674789
Epoch:2 Step:448 Training_loss:0.715657 Training_loss_avg:0.675724
Epoch:2 Step:456 Training_loss:0.687616 Training_loss_avg:0.675352
Epoch:2 Step:464 Training_loss:0.686770 Training_loss_avg:0.675714
Epoch:2 Step:472 Training_loss:0.624887 Training_loss_avg:0.675261
Epoch:2 Step:480 Training_loss:0.622801 Training_loss_avg:0.673960
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:480 Val_loss:0.684929
Epoch:2 Step:488 Training_loss:0.646005 Training_loss_avg:0.673675
Epoch:2 Step:496 Training_loss:0.765547 Training_loss_avg:0.675318
Epoch:2 Step:504 Training_loss:0.672722 Training_loss_avg:0.675631
Epoch:2 Step:512 Training_loss:0.729349 Training_loss_avg:0.675477
Epoch:2 Step:520 Training_loss:0.670484 Training_loss_avg:0.675044
Epoch:2 Step:528 Training_loss:0.700657 Training_loss_avg:0.675978
Epoch:2 Step:536 Training_loss:0.793148 Training_loss_avg:0.679025
Epoch:2 Step:544 Training_loss:0.656928 Training_loss_avg:0.679589
Epoch:2 Step:552 Training_loss:0.636012 Training_loss_avg:0.677848
Epoch:2 Step:560 Training_loss:0.670169 Training_loss_avg:0.677826
Epoch:2 Step:568 Training_loss:0.714517 Training_loss_avg:0.680202
Epoch:2 Step:576 Training_loss:0.653384 Training_loss_avg:0.681362
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:576 Val_loss:0.687162
Epoch:2 Step:584 Training_loss:0.701756 Training_loss_avg:0.682589
Epoch:2 Step:592 Training_loss:0.658169 Training_loss_avg:0.681483
Epoch:2 Step:600 Training_loss:0.631148 Training_loss_avg:0.683177
Epoch:2 Step:608 Training_loss:0.665674 Training_loss_avg:0.681882
Epoch:2 Step:616 Training_loss:0.812509 Training_loss_avg:0.684307
Epoch:2 Step:624 Training_loss:0.804311 Training_loss_avg:0.686825
Epoch:2 Step:632 Training_loss:0.730787 Training_loss_avg:0.686255
Epoch:2 Step:640 Training_loss:0.737095 Training_loss_avg:0.685948
Epoch:2 Step:648 Training_loss:0.578108 Training_loss_avg:0.685007
Epoch:2 Step:656 Training_loss:0.659855 Training_loss_avg:0.684603
Epoch:2 Step:664 Training_loss:0.684497 Training_loss_avg:0.686033
Epoch:2 Step:672 Training_loss:0.659955 Training_loss_avg:0.684628
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:672 Val_loss:0.680122
Epoch:2 Step:680 Training_loss:0.629095 Training_loss_avg:0.682869
Epoch:2 Step:688 Training_loss:0.646557 Training_loss_avg:0.682893
Epoch:2 Step:696 Training_loss:0.737168 Training_loss_avg:0.682803
Epoch:2 Step:704 Training_loss:0.677830 Training_loss_avg:0.684260
Epoch:2 Step:712 Training_loss:0.656322 Training_loss_avg:0.683631
Epoch:2 Step:720 Training_loss:0.775382 Training_loss_avg:0.685715
Epoch:2 Step:728 Training_loss:0.696408 Training_loss_avg:0.683763
Epoch:2 Step:736 Training_loss:0.674068 Training_loss_avg:0.683996
Epoch:2 Step:744 Training_loss:0.746422 Training_loss_avg:0.686741
Epoch:2 Step:752 Training_loss:0.693103 Training_loss_avg:0.687975
Epoch:2 Step:760 Training_loss:0.705516 Training_loss_avg:0.687950
Epoch:2 Step:768 Training_loss:0.604933 Training_loss_avg:0.688328
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:768 Val_loss:0.678877
Epoch:2 Step:776 Training_loss:0.731090 Training_loss_avg:0.690335
Epoch:2 Step:784 Training_loss:0.637126 Training_loss_avg:0.688460
Epoch:2 Step:792 Training_loss:0.677540 Training_loss_avg:0.686243
Epoch:2 Step:800 Training_loss:0.686611 Training_loss_avg:0.688381
Epoch:2 Step:808 Training_loss:0.602857 Training_loss_avg:0.684883
Epoch:2 Step:816 Training_loss:0.685464 Training_loss_avg:0.685647
Epoch:2 Step:824 Training_loss:0.675285 Training_loss_avg:0.685984
Epoch:2 Step:832 Training_loss:0.761357 Training_loss_avg:0.688112
Epoch:2 Step:840 Training_loss:0.637686 Training_loss_avg:0.686167
Epoch:2 Step:848 Training_loss:0.587823 Training_loss_avg:0.683610
Epoch:2 Step:856 Training_loss:0.648851 Training_loss_avg:0.682835
Epoch:2 Step:864 Training_loss:0.585826 Training_loss_avg:0.680816
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:864 Val_loss:0.678996
Epoch:2 Step:872 Training_loss:0.695303 Training_loss_avg:0.682224
Epoch:2 Step:880 Training_loss:0.715955 Training_loss_avg:0.684087
Epoch:2 Step:888 Training_loss:0.823733 Training_loss_avg:0.687642
Epoch:2 Step:896 Training_loss:0.734653 Training_loss_avg:0.687024
Epoch:2 Step:904 Training_loss:0.700756 Training_loss_avg:0.687585
Epoch:2 Step:912 Training_loss:0.678512 Training_loss_avg:0.686568
Epoch:2 Step:920 Training_loss:0.767150 Training_loss_avg:0.688501
Epoch:2 Step:928 Training_loss:0.697746 Training_loss_avg:0.688443
Epoch:2 Step:936 Training_loss:0.676465 Training_loss_avg:0.686109
Epoch:2 Step:944 Training_loss:0.653643 Training_loss_avg:0.686044
Epoch:2 Step:952 Training_loss:0.586188 Training_loss_avg:0.685047
Epoch:2 Step:960 Training_loss:0.558046 Training_loss_avg:0.682805
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:960 Val_loss:0.677706
Epoch:2 Step:968 Training_loss:0.656902 Training_loss_avg:0.681652
Epoch:2 Step:976 Training_loss:0.662666 Training_loss_avg:0.681838
Epoch:2 Step:984 Training_loss:0.582575 Training_loss_avg:0.679454
Epoch:2 Step:992 Training_loss:0.626839 Training_loss_avg:0.678828
Epoch:2 Step:1000 Training_loss:0.721742 Training_loss_avg:0.680640
Epoch:2 Step:1008 Training_loss:0.611163 Training_loss_avg:0.679550
Epoch:2 Step:1016 Training_loss:0.782813 Training_loss_avg:0.678956
Epoch:2 Step:1024 Training_loss:0.852549 Training_loss_avg:0.679920
Epoch:2 Step:1032 Training_loss:0.693150 Training_loss_avg:0.679168
Epoch:2 Step:1040 Training_loss:0.723112 Training_loss_avg:0.678888
Epoch:2 Step:1048 Training_loss:0.568516 Training_loss_avg:0.678696
Epoch:2 Step:1056 Training_loss:0.654914 Training_loss_avg:0.678597
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:1056 Val_loss:0.682278
Epoch:2 Step:1064 Training_loss:0.702507 Training_loss_avg:0.678958
Epoch:2 Step:1072 Training_loss:0.654366 Training_loss_avg:0.678846
Epoch:2 Step:1080 Training_loss:0.612585 Training_loss_avg:0.678516
Epoch:2 Step:1088 Training_loss:0.538423 Training_loss_avg:0.676353
Epoch:2 Step:1096 Training_loss:0.633456 Training_loss_avg:0.674279
Epoch:2 Step:1104 Training_loss:0.804654 Training_loss_avg:0.676815
Epoch:2 Step:1112 Training_loss:0.680680 Training_loss_avg:0.677302
Epoch:2 Step:1120 Training_loss:0.786147 Training_loss_avg:0.677518
Epoch:2 Step:1128 Training_loss:0.771841 Training_loss_avg:0.679026
Epoch:2 Step:1136 Training_loss:0.705797 Training_loss_avg:0.679661
Epoch:2 Step:1144 Training_loss:0.629036 Training_loss_avg:0.677313
Epoch:2 Step:1152 Training_loss:0.578561 Training_loss_avg:0.675022
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:1152 Val_loss:0.682644
Epoch:2 Step:1160 Training_loss:0.864372 Training_loss_avg:0.678199
Epoch:2 Step:1168 Training_loss:0.636976 Training_loss_avg:0.678840
Epoch:2 Step:1176 Training_loss:0.744325 Training_loss_avg:0.679105
Epoch:2 Step:1184 Training_loss:0.685234 Training_loss_avg:0.680067
Epoch:2 Step:1192 Training_loss:0.764838 Training_loss_avg:0.681813
Epoch:2 Step:1200 Training_loss:0.657806 Training_loss_avg:0.681237
Epoch:2 Step:1208 Training_loss:0.664226 Training_loss_avg:0.682464
Epoch:2 Step:1216 Training_loss:0.713415 Training_loss_avg:0.683023
Epoch:2 Step:1224 Training_loss:0.688439 Training_loss_avg:0.683286
Epoch:2 Step:1232 Training_loss:0.764168 Training_loss_avg:0.683343
Epoch:2 Step:1240 Training_loss:0.756369 Training_loss_avg:0.685716
Epoch:2 Step:1248 Training_loss:0.693069 Training_loss_avg:0.687821
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:1248 Val_loss:0.682689
Epoch:2 Step:1256 Training_loss:0.673120 Training_loss_avg:0.688307
Epoch:2 Step:1264 Training_loss:0.692483 Training_loss_avg:0.690440
Epoch:2 Step:1272 Training_loss:0.767872 Training_loss_avg:0.691891
Epoch:2 Step:1280 Training_loss:0.724999 Training_loss_avg:0.692072
Epoch:2 Step:1288 Training_loss:0.699975 Training_loss_avg:0.689597
Epoch:2 Step:1296 Training_loss:0.707332 Training_loss_avg:0.689050
Epoch:2 Step:1304 Training_loss:0.696050 Training_loss_avg:0.688956
Epoch:2 Step:1312 Training_loss:0.695217 Training_loss_avg:0.689290
Epoch:2 Step:1320 Training_loss:0.673542 Training_loss_avg:0.687418
Epoch:2 Step:1328 Training_loss:0.655708 Training_loss_avg:0.686577
Epoch:2 Step:1336 Training_loss:0.614466 Training_loss_avg:0.685337
Epoch:2 Step:1344 Training_loss:0.699972 Training_loss_avg:0.686264
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:1344 Val_loss:0.722770
Epoch:2 Step:1352 Training_loss:0.655836 Training_loss_avg:0.687657
Epoch:2 Step:1360 Training_loss:0.643693 Training_loss_avg:0.689370
Epoch:2 Step:1368 Training_loss:0.573284 Training_loss_avg:0.687698
Epoch:2 Step:1376 Training_loss:0.710053 Training_loss_avg:0.688645
Epoch:2 Step:1384 Training_loss:0.612089 Training_loss_avg:0.689236
Epoch:2 Step:1392 Training_loss:0.864813 Training_loss_avg:0.693995
Epoch:2 Step:1400 Training_loss:0.616670 Training_loss_avg:0.691894
Epoch:2 Step:1408 Training_loss:0.932018 Training_loss_avg:0.698311
Epoch:2 Step:1416 Training_loss:0.704778 Training_loss_avg:0.696750
Epoch:2 Step:1424 Training_loss:0.715607 Training_loss_avg:0.694011
Epoch:2 Step:1432 Training_loss:0.679623 Training_loss_avg:0.693741
Epoch:2 Step:1440 Training_loss:0.675703 Training_loss_avg:0.692792
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:1440 Val_loss:0.689906
Epoch:2 Step:1448 Training_loss:0.692755 Training_loss_avg:0.695277
Epoch:2 Step:1456 Training_loss:0.700983 Training_loss_avg:0.696199
Epoch:2 Step:1464 Training_loss:0.637417 Training_loss_avg:0.694897
Epoch:2 Step:1472 Training_loss:0.696040 Training_loss_avg:0.695730
Epoch:2 Step:1480 Training_loss:0.698482 Training_loss_avg:0.697448
Epoch:2 Step:1488 Training_loss:0.694578 Training_loss_avg:0.700571
Epoch:2 Step:1496 Training_loss:0.632367 Training_loss_avg:0.700550
Epoch:2 Step:1504 Training_loss:0.644408 Training_loss_avg:0.697345
Epoch:2 Step:1512 Training_loss:0.715093 Training_loss_avg:0.698033
Epoch:2 Step:1520 Training_loss:0.696376 Training_loss_avg:0.696237
Epoch:2 Step:1528 Training_loss:0.772559 Training_loss_avg:0.696252
Epoch:2 Step:1536 Training_loss:0.814017 Training_loss_avg:0.698416
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:1536 Val_loss:0.679355
Epoch:2 Step:1544 Training_loss:0.695231 Training_loss_avg:0.699740
Epoch:2 Step:1552 Training_loss:0.605058 Training_loss_avg:0.700270
Epoch:2 Step:1560 Training_loss:0.726104 Training_loss_avg:0.697505
Epoch:2 Step:1568 Training_loss:0.627858 Training_loss_avg:0.697322
Epoch:2 Step:1576 Training_loss:0.691130 Training_loss_avg:0.696258
Epoch:2 Step:1584 Training_loss:0.623344 Training_loss_avg:0.695021
Epoch:2 Step:1592 Training_loss:0.671224 Training_loss_avg:0.693148
Epoch:2 Step:1600 Training_loss:0.753881 Training_loss_avg:0.695070
Epoch:2 Step:1608 Training_loss:0.698364 Training_loss_avg:0.695753
Epoch:2 Step:1616 Training_loss:0.653301 Training_loss_avg:0.694550
Epoch:2 Step:1624 Training_loss:0.695859 Training_loss_avg:0.694699
Epoch:2 Step:1632 Training_loss:0.672531 Training_loss_avg:0.692866
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:1632 Val_loss:0.680251
Epoch:2 Step:1640 Training_loss:0.738663 Training_loss_avg:0.692512
Epoch:2 Step:1648 Training_loss:0.724732 Training_loss_avg:0.693145
Epoch:2 Step:1656 Training_loss:0.677731 Training_loss_avg:0.693237
Epoch:2 Step:1664 Training_loss:0.680455 Training_loss_avg:0.692997
Epoch:2 Step:1672 Training_loss:0.675775 Training_loss_avg:0.691155
Epoch:2 Step:1680 Training_loss:0.672150 Training_loss_avg:0.690098
Epoch:2 Step:1688 Training_loss:0.666187 Training_loss_avg:0.689422
Epoch:2 Step:1696 Training_loss:0.689263 Training_loss_avg:0.689061
Epoch:2 Step:1704 Training_loss:0.738707 Training_loss_avg:0.689914
Epoch:2 Step:1712 Training_loss:0.697312 Training_loss_avg:0.689956
Epoch:2 Step:1720 Training_loss:0.711764 Training_loss_avg:0.690720
Epoch:2 Step:1728 Training_loss:0.641191 Training_loss_avg:0.690430
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:1728 Val_loss:0.680843
Epoch:2 Step:1736 Training_loss:0.670499 Training_loss_avg:0.691551
Epoch:2 Step:1744 Training_loss:0.778689 Training_loss_avg:0.693125
Epoch:2 Step:1752 Training_loss:0.672409 Training_loss_avg:0.693456
Epoch:2 Step:1760 Training_loss:0.794052 Training_loss_avg:0.696464
Epoch:2 Step:1768 Training_loss:0.686687 Training_loss_avg:0.698732
Epoch:2 Step:1776 Training_loss:0.662749 Training_loss_avg:0.697786
Epoch:2 Step:1784 Training_loss:0.730022 Training_loss_avg:0.700144
Epoch:2 Step:1792 Training_loss:0.712737 Training_loss_avg:0.697103
Epoch:2 Step:1800 Training_loss:0.739857 Training_loss_avg:0.699566
Epoch:2 Step:1808 Training_loss:0.696642 Training_loss_avg:0.694859
Epoch:2 Step:1816 Training_loss:0.698401 Training_loss_avg:0.694731
Epoch:2 Step:1824 Training_loss:0.704847 Training_loss_avg:0.694516
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:1824 Val_loss:0.690971
Epoch:2 Step:1832 Training_loss:0.689391 Training_loss_avg:0.694711
Epoch:2 Step:1840 Training_loss:0.675725 Training_loss_avg:0.694712
Epoch:2 Step:1848 Training_loss:0.661823 Training_loss_avg:0.694093
Epoch:2 Step:1856 Training_loss:0.703536 Training_loss_avg:0.694144
Epoch:2 Step:1864 Training_loss:0.682572 Training_loss_avg:0.695047
Epoch:2 Step:1872 Training_loss:0.706297 Training_loss_avg:0.695253
Epoch:2 Step:1880 Training_loss:0.689800 Training_loss_avg:0.695079
Epoch:2 Step:1888 Training_loss:0.698006 Training_loss_avg:0.695147
Epoch:2 Step:1896 Training_loss:0.730093 Training_loss_avg:0.697102
Epoch:2 Step:1904 Training_loss:0.683431 Training_loss_avg:0.697882
Epoch:2 Step:1912 Training_loss:0.690723 Training_loss_avg:0.697395
Epoch:2 Step:1920 Training_loss:0.685979 Training_loss_avg:0.697187
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:1920 Val_loss:0.687461
Epoch:2 Step:1928 Training_loss:0.647450 Training_loss_avg:0.694685
Epoch:2 Step:1936 Training_loss:0.687704 Training_loss_avg:0.692159
Epoch:2 Step:1944 Training_loss:0.677483 Training_loss_avg:0.691804
Epoch:2 Step:1952 Training_loss:0.692213 Training_loss_avg:0.693547
Epoch:2 Step:1960 Training_loss:0.688709 Training_loss_avg:0.692799
Epoch:2 Step:1968 Training_loss:0.738636 Training_loss_avg:0.695015
Epoch:2 Step:1976 Training_loss:0.687566 Training_loss_avg:0.694943
Epoch:2 Step:1984 Training_loss:0.724098 Training_loss_avg:0.696958
Epoch:2 Step:1992 Training_loss:0.708628 Training_loss_avg:0.697706
Epoch:2 Step:2000 Training_loss:0.683326 Training_loss_avg:0.696295
Epoch:2 Step:2008 Training_loss:0.680557 Training_loss_avg:0.695939
Epoch:2 Step:2016 Training_loss:0.686107 Training_loss_avg:0.696595
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:2016 Val_loss:0.684370
Epoch:2 Step:2024 Training_loss:0.664275 Training_loss_avg:0.695964
Epoch:2 Step:2032 Training_loss:0.651701 Training_loss_avg:0.695547
Epoch:2 Step:2040 Training_loss:0.722059 Training_loss_avg:0.695215
Epoch:2 Step:2048 Training_loss:0.697969 Training_loss_avg:0.694680
Epoch:2 Step:2056 Training_loss:0.684468 Training_loss_avg:0.694814
Epoch:2 Step:2064 Training_loss:0.729111 Training_loss_avg:0.695788
Epoch:2 Step:2072 Training_loss:0.665639 Training_loss_avg:0.695585
Epoch:2 Step:2080 Training_loss:0.695734 Training_loss_avg:0.696056
Epoch:2 Step:2088 Training_loss:0.681810 Training_loss_avg:0.696369
Epoch:2 Step:2096 Training_loss:0.690844 Training_loss_avg:0.696401
Epoch:2 Step:2104 Training_loss:0.698832 Training_loss_avg:0.695603
Epoch:2 Step:2112 Training_loss:0.719371 Training_loss_avg:0.696044
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:2112 Val_loss:0.687029
Epoch:2 Step:2120 Training_loss:0.679578 Training_loss_avg:0.695401
Epoch:2 Step:2128 Training_loss:0.646501 Training_loss_avg:0.695507
Epoch:2 Step:2136 Training_loss:0.697847 Training_loss_avg:0.696054
Epoch:2 Step:2144 Training_loss:0.682196 Training_loss_avg:0.694124
Epoch:2 Step:2152 Training_loss:0.682930 Training_loss_avg:0.694334
Epoch:2 Step:2160 Training_loss:0.702661 Training_loss_avg:0.692506
Epoch:2 Step:2168 Training_loss:0.699899 Training_loss_avg:0.692771
Epoch:2 Step:2176 Training_loss:0.669164 Training_loss_avg:0.692899
Epoch:2 Step:2184 Training_loss:0.682878 Training_loss_avg:0.691956
Epoch:2 Step:2192 Training_loss:0.685287 Training_loss_avg:0.691407
Epoch:2 Step:2200 Training_loss:0.701006 Training_loss_avg:0.690630
Epoch:2 Step:2208 Training_loss:0.692192 Training_loss_avg:0.690541
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:2208 Val_loss:0.686908
Epoch:2 Step:2216 Training_loss:0.721366 Training_loss_avg:0.691000
Epoch:2 Step:2224 Training_loss:0.712398 Training_loss_avg:0.691151
Epoch:2 Step:2232 Training_loss:0.685066 Training_loss_avg:0.691065
Epoch:2 Step:2240 Training_loss:0.681755 Training_loss_avg:0.691185
Epoch:2 Step:2248 Training_loss:0.696881 Training_loss_avg:0.691887
Epoch:2 Step:2256 Training_loss:0.728874 Training_loss_avg:0.692393
Epoch:2 Step:2264 Training_loss:0.724151 Training_loss_avg:0.693225
Epoch:2 Step:2272 Training_loss:0.714191 Training_loss_avg:0.693383
Epoch:2 Step:2280 Training_loss:0.671039 Training_loss_avg:0.693008
Epoch:2 Step:2288 Training_loss:0.760373 Training_loss_avg:0.694255
Epoch:2 Step:2296 Training_loss:0.653378 Training_loss_avg:0.692721
Epoch:2 Step:2304 Training_loss:0.707138 Training_loss_avg:0.693195
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:2304 Val_loss:0.681589
Epoch:2 Step:2312 Training_loss:0.665264 Training_loss_avg:0.692686
Epoch:2 Step:2320 Training_loss:0.660897 Training_loss_avg:0.692184
Epoch:2 Step:2328 Training_loss:0.713942 Training_loss_avg:0.693514
Epoch:2 Step:2336 Training_loss:0.735772 Training_loss_avg:0.694475
Epoch:2 Step:2344 Training_loss:0.697574 Training_loss_avg:0.694877
Epoch:2 Step:2352 Training_loss:0.640633 Training_loss_avg:0.693845
Epoch:2 Step:2360 Training_loss:0.622635 Training_loss_avg:0.692524
Epoch:2 Step:2368 Training_loss:0.668429 Training_loss_avg:0.691120
Epoch:2 Step:2376 Training_loss:0.657602 Training_loss_avg:0.690521
Epoch:2 Step:2384 Training_loss:0.706154 Training_loss_avg:0.690162
Epoch:2 Step:2392 Training_loss:0.619027 Training_loss_avg:0.688370
Epoch:2 Step:2400 Training_loss:0.717119 Training_loss_avg:0.689045
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:2400 Val_loss:0.680531
Epoch:2 Step:2408 Training_loss:0.682944 Training_loss_avg:0.689093
Epoch:2 Step:2416 Training_loss:0.659024 Training_loss_avg:0.688552
Epoch:2 Step:2424 Training_loss:0.793728 Training_loss_avg:0.691141
Epoch:2 Step:2432 Training_loss:0.694150 Training_loss_avg:0.691990
Epoch:2 Step:2440 Training_loss:0.756732 Training_loss_avg:0.692683
Epoch:2 Step:2448 Training_loss:0.649810 Training_loss_avg:0.691720
Epoch:2 Step:2456 Training_loss:0.684746 Training_loss_avg:0.691725
Epoch:2 Step:2464 Training_loss:0.611985 Training_loss_avg:0.689383
Epoch:2 Step:2472 Training_loss:0.727965 Training_loss_avg:0.690629
Epoch:2 Step:2480 Training_loss:0.587398 Training_loss_avg:0.688463
Epoch:2 Step:2488 Training_loss:0.694847 Training_loss_avg:0.688723
Epoch:2 Step:2496 Training_loss:0.645377 Training_loss_avg:0.687814
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:2496 Val_loss:0.680704
Epoch:2 Step:2504 Training_loss:0.721487 Training_loss_avg:0.688267
Epoch:2 Step:2512 Training_loss:0.626224 Training_loss_avg:0.686404
Epoch:2 Step:2520 Training_loss:0.711017 Training_loss_avg:0.687033
Epoch:2 Step:2528 Training_loss:0.665755 Training_loss_avg:0.687418
Epoch:2 Step:2536 Training_loss:0.723816 Training_loss_avg:0.687937
Epoch:2 Step:2544 Training_loss:0.715761 Training_loss_avg:0.688609
Epoch:2 Step:2552 Training_loss:0.603553 Training_loss_avg:0.687021
Epoch:2 Step:2560 Training_loss:0.639090 Training_loss_avg:0.685750
Epoch:2 Step:2568 Training_loss:0.769282 Training_loss_avg:0.687137
Epoch:2 Step:2576 Training_loss:0.625238 Training_loss_avg:0.686259
Epoch:2 Step:2584 Training_loss:0.709008 Training_loss_avg:0.686782
Epoch:2 Step:2592 Training_loss:0.619847 Training_loss_avg:0.685473
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:2592 Val_loss:0.681358
Epoch:2 Step:2600 Training_loss:0.650510 Training_loss_avg:0.684463
Epoch:2 Step:2608 Training_loss:0.532835 Training_loss_avg:0.681276
Epoch:2 Step:2616 Training_loss:0.754325 Training_loss_avg:0.681935
Epoch:2 Step:2624 Training_loss:0.792558 Training_loss_avg:0.683538
Epoch:2 Step:2632 Training_loss:0.614460 Training_loss_avg:0.682126
Epoch:2 Step:2640 Training_loss:0.718734 Training_loss_avg:0.682866
Epoch:2 Step:2648 Training_loss:0.626791 Training_loss_avg:0.681464
Epoch:2 Step:2656 Training_loss:0.665654 Training_loss_avg:0.680199
Epoch:2 Step:2664 Training_loss:0.671614 Training_loss_avg:0.679149
Epoch:2 Step:2672 Training_loss:0.732298 Training_loss_avg:0.679511
Epoch:2 Step:2680 Training_loss:0.667683 Training_loss_avg:0.679444
Epoch:2 Step:2688 Training_loss:0.668369 Training_loss_avg:0.677604
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:2688 Val_loss:0.681984
Epoch:2 Step:2696 Training_loss:0.556112 Training_loss_avg:0.675658
Epoch:2 Step:2704 Training_loss:0.726353 Training_loss_avg:0.676043
Epoch:2 Step:2712 Training_loss:0.688033 Training_loss_avg:0.676498
Epoch:2 Step:2720 Training_loss:0.646789 Training_loss_avg:0.676216
Epoch:2 Step:2728 Training_loss:0.705023 Training_loss_avg:0.676037
Epoch:2 Step:2736 Training_loss:0.595517 Training_loss_avg:0.673232
Epoch:2 Step:2744 Training_loss:0.758152 Training_loss_avg:0.674444
Epoch:2 Step:2752 Training_loss:0.674352 Training_loss_avg:0.675118
Epoch:2 Step:2760 Training_loss:0.646227 Training_loss_avg:0.675590
Epoch:2 Step:2768 Training_loss:0.582110 Training_loss_avg:0.673864
Epoch:2 Step:2776 Training_loss:0.948380 Training_loss_avg:0.679679
Epoch:2 Step:2784 Training_loss:0.762086 Training_loss_avg:0.680798
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:2784 Val_loss:0.683232
Epoch:2 Step:2792 Training_loss:0.718571 Training_loss_avg:0.682789
Epoch:2 Step:2800 Training_loss:0.704505 Training_loss_avg:0.682537
Epoch:2 Step:2808 Training_loss:0.636421 Training_loss_avg:0.681606
Epoch:2 Step:2816 Training_loss:0.781618 Training_loss_avg:0.684058
Epoch:2 Step:2824 Training_loss:0.773825 Training_loss_avg:0.683660
Epoch:2 Step:2832 Training_loss:0.668628 Training_loss_avg:0.683149
Epoch:2 Step:2840 Training_loss:0.697438 Training_loss_avg:0.681964
Epoch:2 Step:2848 Training_loss:0.738999 Training_loss_avg:0.683747
Epoch:2 Step:2856 Training_loss:0.614075 Training_loss_avg:0.682334
Epoch:2 Step:2864 Training_loss:0.618724 Training_loss_avg:0.682469
Epoch:2 Step:2872 Training_loss:0.706890 Training_loss_avg:0.682047
Epoch:2 Step:2880 Training_loss:0.662292 Training_loss_avg:0.683545
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:2880 Val_loss:0.681623
Epoch:2 Step:2888 Training_loss:0.699660 Training_loss_avg:0.683641
Epoch:2 Step:2896 Training_loss:0.673224 Training_loss_avg:0.684198
Epoch:2 Step:2904 Training_loss:0.690176 Training_loss_avg:0.683572
Epoch:2 Step:2912 Training_loss:0.736158 Training_loss_avg:0.685771
Epoch:2 Step:2920 Training_loss:0.785094 Training_loss_avg:0.687252
Epoch:2 Step:2928 Training_loss:0.724919 Training_loss_avg:0.688436
Epoch:2 Step:2936 Training_loss:0.681616 Training_loss_avg:0.687592
Epoch:2 Step:2944 Training_loss:0.694268 Training_loss_avg:0.687162
Epoch:2 Step:2952 Training_loss:0.655740 Training_loss_avg:0.688205
Epoch:2 Step:2960 Training_loss:0.694489 Training_loss_avg:0.689313
Epoch:2 Step:2968 Training_loss:0.692279 Training_loss_avg:0.687773
Epoch:2 Step:2976 Training_loss:0.667155 Training_loss_avg:0.688612
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:2976 Val_loss:0.683529
Epoch:2 Step:2984 Training_loss:0.704728 Training_loss_avg:0.688526
Epoch:2 Step:2992 Training_loss:0.703472 Training_loss_avg:0.690199
Epoch:2 Step:3000 Training_loss:0.712887 Training_loss_avg:0.691446
Epoch:2 Step:3008 Training_loss:0.700945 Training_loss_avg:0.694808
Epoch:2 Step:3016 Training_loss:0.696951 Training_loss_avg:0.693661
Epoch:2 Step:3024 Training_loss:0.690629 Training_loss_avg:0.691622
Epoch:2 Step:3032 Training_loss:0.710441 Training_loss_avg:0.693542
Epoch:2 Step:3040 Training_loss:0.680382 Training_loss_avg:0.692775
Epoch:2 Step:3048 Training_loss:0.646256 Training_loss_avg:0.693164
Epoch:2 Step:3056 Training_loss:0.669732 Training_loss_avg:0.693246
Epoch:2 Step:3064 Training_loss:0.731066 Training_loss_avg:0.694435
Epoch:2 Step:3072 Training_loss:0.677392 Training_loss_avg:0.693337
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:3072 Val_loss:0.686522
Epoch:2 Step:3080 Training_loss:0.693203 Training_loss_avg:0.693847
Epoch:2 Step:3088 Training_loss:0.691654 Training_loss_avg:0.694313
Epoch:2 Step:3096 Training_loss:0.665915 Training_loss_avg:0.696509
Epoch:2 Step:3104 Training_loss:0.721779 Training_loss_avg:0.696417
Epoch:2 Step:3112 Training_loss:0.745516 Training_loss_avg:0.697567
Epoch:2 Step:3120 Training_loss:0.677087 Training_loss_avg:0.698173
Epoch:2 Step:3128 Training_loss:0.698270 Training_loss_avg:0.698038
Epoch:2 Step:3136 Training_loss:0.716896 Training_loss_avg:0.700465
Epoch:2 Step:3144 Training_loss:0.731806 Training_loss_avg:0.699939
Epoch:2 Step:3152 Training_loss:0.686972 Training_loss_avg:0.700191
Epoch:2 Step:3160 Training_loss:0.643503 Training_loss_avg:0.700136
Epoch:2 Step:3168 Training_loss:0.645668 Training_loss_avg:0.701408
Validating:


52it [00:07,  6.60it/s]


Epoch:2 Step:3168 Val_loss:0.683095
Epoch:2 Step:3176 Training_loss:0.698196 Training_loss_avg:0.696404
Epoch:2 Step:3184 Training_loss:0.713344 Training_loss_avg:0.695429
Epoch:2 Step:3192 Training_loss:0.732080 Training_loss_avg:0.695699
Epoch:2 Step:3200 Training_loss:0.673744 Training_loss_avg:0.695084
Epoch:2 Step:3208 Training_loss:0.739345 Training_loss_avg:0.697143
Epoch:2 Step:3216 Training_loss:0.733239 Training_loss_avg:0.696175
Epoch:2 Step:3224 Training_loss:0.681248 Training_loss_avg:0.694323
Epoch:2 Step:3232 Training_loss:0.759713 Training_loss_avg:0.696145
Epoch:2 Step:3240 Training_loss:0.710156 Training_loss_avg:0.696399
Epoch:2 Step:3248 Training_loss:0.690421 Training_loss_avg:0.695428
Epoch:2 Step:3256 Training_loss:0.733133 Training_loss_avg:0.697809
Epoch:2 Step:3264 Training_loss:0.688751 Training_loss_avg:0.699210
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:3264 Val_loss:0.688297
Epoch:2 Step:3272 Training_loss:0.698150 Training_loss_avg:0.699035
Epoch:2 Step:3280 Training_loss:0.680014 Training_loss_avg:0.699389
Epoch:2 Step:3288 Training_loss:0.721939 Training_loss_avg:0.699835
Epoch:2 Step:3296 Training_loss:0.708455 Training_loss_avg:0.700539
Epoch:2 Step:3304 Training_loss:0.686652 Training_loss_avg:0.700469
Epoch:2 Step:3312 Training_loss:0.672602 Training_loss_avg:0.699198
Epoch:2 Step:3320 Training_loss:0.693473 Training_loss_avg:0.697365
Epoch:2 Step:3328 Training_loss:0.702495 Training_loss_avg:0.696917
Epoch:3 Step:0 Training_loss:0.711985 Training_loss_avg:0.697524
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:0 Val_loss:0.692527
Epoch:3 Step:8 Training_loss:0.675332 Training_loss_avg:0.697146
Epoch:3 Step:16 Training_loss:0.692067 Training_loss_avg:0.697872
Epoch:3 Step:24 Training_loss:0.708252 Training_loss_avg:0.698147
Epoch:3 Step:32 Training_loss:0.663229 Training_loss_avg:0.697566
Epoch:3 Step:40 Training_loss:0.724170 Training_loss_avg:0.698707
Epoch:3 Step:48 Training_loss:0.680825 Training_loss_avg:0.698229
Epoch:3 Step:56 Training_loss:0.671626 Training_loss_avg:0.697592
Epoch:3 Step:64 Training_loss:0.682198 Training_loss_avg:0.696978
Epoch:3 Step:72 Training_loss:0.711091 Training_loss_avg:0.697181
Epoch:3 Step:80 Training_loss:0.685036 Training_loss_avg:0.696943
Epoch:3 Step:88 Training_loss:0.675588 Training_loss_avg:0.696642
Epoch:3 Step:96 Training_loss:0.684670 Training_loss_avg:0.696126
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:96 Val_loss:0.686699
Epoch:3 Step:104 Training_loss:0.691675 Training_loss_avg:0.696352
Epoch:3 Step:112 Training_loss:0.693821 Training_loss_avg:0.697303
Epoch:3 Step:120 Training_loss:0.652651 Training_loss_avg:0.696962
Epoch:3 Step:128 Training_loss:0.685196 Training_loss_avg:0.696044
Epoch:3 Step:136 Training_loss:0.721576 Training_loss_avg:0.696928
Epoch:3 Step:144 Training_loss:0.717792 Training_loss_avg:0.697420
Epoch:3 Step:152 Training_loss:0.743233 Training_loss_avg:0.698452
Epoch:3 Step:160 Training_loss:0.666293 Training_loss_avg:0.698459
Epoch:3 Step:168 Training_loss:0.670561 Training_loss_avg:0.697435
Epoch:3 Step:176 Training_loss:0.717378 Training_loss_avg:0.696872
Epoch:3 Step:184 Training_loss:0.705654 Training_loss_avg:0.697443
Epoch:3 Step:192 Training_loss:0.721614 Training_loss_avg:0.697910
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:192 Val_loss:0.687328
Epoch:3 Step:200 Training_loss:0.698155 Training_loss_avg:0.697535
Epoch:3 Step:208 Training_loss:0.694989 Training_loss_avg:0.696799
Epoch:3 Step:216 Training_loss:0.659938 Training_loss_avg:0.696258
Epoch:3 Step:224 Training_loss:0.656891 Training_loss_avg:0.696526
Epoch:3 Step:232 Training_loss:0.727071 Training_loss_avg:0.698154
Epoch:3 Step:240 Training_loss:0.681233 Training_loss_avg:0.697815
Epoch:3 Step:248 Training_loss:0.646062 Training_loss_avg:0.696469
Epoch:3 Step:256 Training_loss:0.679266 Training_loss_avg:0.695413
Epoch:3 Step:264 Training_loss:0.660248 Training_loss_avg:0.695143
Epoch:3 Step:272 Training_loss:0.659449 Training_loss_avg:0.693545
Epoch:3 Step:280 Training_loss:0.683091 Training_loss_avg:0.692542
Epoch:3 Step:288 Training_loss:0.697097 Training_loss_avg:0.692859
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:288 Val_loss:0.684313
Epoch:3 Step:296 Training_loss:0.656034 Training_loss_avg:0.690786
Epoch:3 Step:304 Training_loss:0.631373 Training_loss_avg:0.689210
Epoch:3 Step:312 Training_loss:0.692549 Training_loss_avg:0.689253
Epoch:3 Step:320 Training_loss:0.685128 Training_loss_avg:0.688292
Epoch:3 Step:328 Training_loss:0.689055 Training_loss_avg:0.688298
Epoch:3 Step:336 Training_loss:0.698370 Training_loss_avg:0.688303
Epoch:3 Step:344 Training_loss:0.710019 Training_loss_avg:0.688903
Epoch:3 Step:352 Training_loss:0.686995 Training_loss_avg:0.688204
Epoch:3 Step:360 Training_loss:0.624911 Training_loss_avg:0.686533
Epoch:3 Step:368 Training_loss:0.707760 Training_loss_avg:0.686955
Epoch:3 Step:376 Training_loss:0.746543 Training_loss_avg:0.688434
Epoch:3 Step:384 Training_loss:0.637767 Training_loss_avg:0.687320
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:384 Val_loss:0.681002
Epoch:3 Step:392 Training_loss:0.573016 Training_loss_avg:0.684730
Epoch:3 Step:400 Training_loss:0.601020 Training_loss_avg:0.682511
Epoch:3 Step:408 Training_loss:0.661853 Training_loss_avg:0.682242
Epoch:3 Step:416 Training_loss:0.491857 Training_loss_avg:0.678237
Epoch:3 Step:424 Training_loss:0.681798 Training_loss_avg:0.677708
Epoch:3 Step:432 Training_loss:0.659541 Training_loss_avg:0.677635
Epoch:3 Step:440 Training_loss:0.678580 Training_loss_avg:0.676723
Epoch:3 Step:448 Training_loss:0.737930 Training_loss_avg:0.677865
Epoch:3 Step:456 Training_loss:0.507555 Training_loss_avg:0.674583
Epoch:3 Step:464 Training_loss:0.591658 Training_loss_avg:0.672773
Epoch:3 Step:472 Training_loss:0.577240 Training_loss_avg:0.670096
Epoch:3 Step:480 Training_loss:0.769206 Training_loss_avg:0.671779
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:480 Val_loss:0.691320
Epoch:3 Step:488 Training_loss:0.662786 Training_loss_avg:0.671523
Epoch:3 Step:496 Training_loss:0.823251 Training_loss_avg:0.674295
Epoch:3 Step:504 Training_loss:0.817009 Training_loss_avg:0.676801
Epoch:3 Step:512 Training_loss:0.593153 Training_loss_avg:0.674788
Epoch:3 Step:520 Training_loss:0.636671 Training_loss_avg:0.674468
Epoch:3 Step:528 Training_loss:0.585844 Training_loss_avg:0.672481
Epoch:3 Step:536 Training_loss:0.508600 Training_loss_avg:0.668222
Epoch:3 Step:544 Training_loss:0.754360 Training_loss_avg:0.668953
Epoch:3 Step:552 Training_loss:0.758395 Training_loss_avg:0.669256
Epoch:3 Step:560 Training_loss:0.816013 Training_loss_avg:0.672251
Epoch:3 Step:568 Training_loss:0.744432 Training_loss_avg:0.673728
Epoch:3 Step:576 Training_loss:0.889708 Training_loss_avg:0.677175
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:576 Val_loss:0.691315
Epoch:3 Step:584 Training_loss:0.844101 Training_loss_avg:0.679944
Epoch:3 Step:592 Training_loss:0.513271 Training_loss_avg:0.675777
Epoch:3 Step:600 Training_loss:0.732666 Training_loss_avg:0.676467
Epoch:3 Step:608 Training_loss:0.734330 Training_loss_avg:0.677254
Epoch:3 Step:616 Training_loss:0.716714 Training_loss_avg:0.678389
Epoch:3 Step:624 Training_loss:0.723775 Training_loss_avg:0.679727
Epoch:3 Step:632 Training_loss:0.799724 Training_loss_avg:0.681180
Epoch:3 Step:640 Training_loss:0.737873 Training_loss_avg:0.682313
Epoch:3 Step:648 Training_loss:0.763255 Training_loss_avg:0.684657
Epoch:3 Step:656 Training_loss:0.711134 Training_loss_avg:0.685294
Epoch:3 Step:664 Training_loss:0.711418 Training_loss_avg:0.686318
Epoch:3 Step:672 Training_loss:0.643559 Training_loss_avg:0.686000
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:672 Val_loss:0.682562
Epoch:3 Step:680 Training_loss:0.627243 Training_loss_avg:0.684883
Epoch:3 Step:688 Training_loss:0.698563 Training_loss_avg:0.684912
Epoch:3 Step:696 Training_loss:0.705487 Training_loss_avg:0.685901
Epoch:3 Step:704 Training_loss:0.757915 Training_loss_avg:0.688432
Epoch:3 Step:712 Training_loss:0.716328 Training_loss_avg:0.688908
Epoch:3 Step:720 Training_loss:0.709992 Training_loss_avg:0.689405
Epoch:3 Step:728 Training_loss:0.701693 Training_loss_avg:0.689658
Epoch:3 Step:736 Training_loss:0.715322 Training_loss_avg:0.689997
Epoch:3 Step:744 Training_loss:0.701926 Training_loss_avg:0.689835
Epoch:3 Step:752 Training_loss:0.715247 Training_loss_avg:0.690400
Epoch:3 Step:760 Training_loss:0.700860 Training_loss_avg:0.691919
Epoch:3 Step:768 Training_loss:0.700257 Training_loss_avg:0.691769
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:768 Val_loss:0.689348
Epoch:3 Step:776 Training_loss:0.700334 Training_loss_avg:0.690845
Epoch:3 Step:784 Training_loss:0.706009 Training_loss_avg:0.692209
Epoch:3 Step:792 Training_loss:0.714325 Training_loss_avg:0.695036
Epoch:3 Step:800 Training_loss:0.688943 Training_loss_avg:0.696794
Epoch:3 Step:808 Training_loss:0.694121 Training_loss_avg:0.697439
Epoch:3 Step:816 Training_loss:0.647792 Training_loss_avg:0.700558
Epoch:3 Step:824 Training_loss:0.689440 Training_loss_avg:0.700711
Epoch:3 Step:832 Training_loss:0.688712 Training_loss_avg:0.701294
Epoch:3 Step:840 Training_loss:0.691119 Training_loss_avg:0.701545
Epoch:3 Step:848 Training_loss:0.689786 Training_loss_avg:0.700582
Epoch:3 Step:856 Training_loss:0.686379 Training_loss_avg:0.704159
Epoch:3 Step:864 Training_loss:0.685776 Training_loss_avg:0.706041
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:864 Val_loss:0.688809
Epoch:3 Step:872 Training_loss:0.666504 Training_loss_avg:0.707826
Epoch:3 Step:880 Training_loss:0.684362 Training_loss_avg:0.706130
Epoch:3 Step:888 Training_loss:0.701805 Training_loss_avg:0.706910
Epoch:3 Step:896 Training_loss:0.710735 Training_loss_avg:0.704660
Epoch:3 Step:904 Training_loss:0.711390 Training_loss_avg:0.702547
Epoch:3 Step:912 Training_loss:0.681313 Training_loss_avg:0.704310
Epoch:3 Step:920 Training_loss:0.670304 Training_loss_avg:0.704983
Epoch:3 Step:928 Training_loss:0.730183 Training_loss_avg:0.707870
Epoch:3 Step:936 Training_loss:0.649607 Training_loss_avg:0.710690
Epoch:3 Step:944 Training_loss:0.689942 Training_loss_avg:0.709402
Epoch:3 Step:952 Training_loss:0.698112 Training_loss_avg:0.708196
Epoch:3 Step:960 Training_loss:0.666759 Training_loss_avg:0.705211
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:960 Val_loss:0.684038
Epoch:3 Step:968 Training_loss:0.677977 Training_loss_avg:0.703882
Epoch:3 Step:976 Training_loss:0.702690 Training_loss_avg:0.700141
Epoch:3 Step:984 Training_loss:0.698347 Training_loss_avg:0.697226
Epoch:3 Step:992 Training_loss:0.684935 Training_loss_avg:0.700660
Epoch:3 Step:1000 Training_loss:0.694477 Training_loss_avg:0.699896
Epoch:3 Step:1008 Training_loss:0.690013 Training_loss_avg:0.699010
Epoch:3 Step:1016 Training_loss:0.660337 Training_loss_avg:0.697882
Epoch:3 Step:1024 Training_loss:0.705261 Training_loss_avg:0.697512
Epoch:3 Step:1032 Training_loss:0.750176 Training_loss_avg:0.696521
Epoch:3 Step:1040 Training_loss:0.718703 Training_loss_avg:0.696137
Epoch:3 Step:1048 Training_loss:0.663137 Training_loss_avg:0.694135
Epoch:3 Step:1056 Training_loss:0.767824 Training_loss_avg:0.695269
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:1056 Val_loss:0.683098
Epoch:3 Step:1064 Training_loss:0.711125 Training_loss_avg:0.695263
Epoch:3 Step:1072 Training_loss:0.707229 Training_loss_avg:0.696536
Epoch:3 Step:1080 Training_loss:0.696561 Training_loss_avg:0.697923
Epoch:3 Step:1088 Training_loss:0.693595 Training_loss_avg:0.697823
Epoch:3 Step:1096 Training_loss:0.695894 Training_loss_avg:0.697631
Epoch:3 Step:1104 Training_loss:0.643929 Training_loss_avg:0.695352
Epoch:3 Step:1112 Training_loss:0.686977 Training_loss_avg:0.694765
Epoch:3 Step:1120 Training_loss:0.679082 Training_loss_avg:0.694146
Epoch:3 Step:1128 Training_loss:0.640532 Training_loss_avg:0.692923
Epoch:3 Step:1136 Training_loss:0.690169 Training_loss_avg:0.692420
Epoch:3 Step:1144 Training_loss:0.659382 Training_loss_avg:0.691569
Epoch:3 Step:1152 Training_loss:0.703356 Training_loss_avg:0.691331
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:1152 Val_loss:0.692159
Epoch:3 Step:1160 Training_loss:0.668567 Training_loss_avg:0.690686
Epoch:3 Step:1168 Training_loss:0.680751 Training_loss_avg:0.690295
Epoch:3 Step:1176 Training_loss:0.679015 Training_loss_avg:0.689869
Epoch:3 Step:1184 Training_loss:0.669425 Training_loss_avg:0.689137
Epoch:3 Step:1192 Training_loss:0.676343 Training_loss_avg:0.688378
Epoch:3 Step:1200 Training_loss:0.691660 Training_loss_avg:0.688432
Epoch:3 Step:1208 Training_loss:0.719873 Training_loss_avg:0.688947
Epoch:3 Step:1216 Training_loss:0.713092 Training_loss_avg:0.690253
Epoch:3 Step:1224 Training_loss:0.683112 Training_loss_avg:0.690127
Epoch:3 Step:1232 Training_loss:0.720867 Training_loss_avg:0.690770
Epoch:3 Step:1240 Training_loss:0.669380 Training_loss_avg:0.690335
Epoch:3 Step:1248 Training_loss:0.705602 Training_loss_avg:0.690651
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:1248 Val_loss:0.683371
Epoch:3 Step:1256 Training_loss:0.630903 Training_loss_avg:0.689542
Epoch:3 Step:1264 Training_loss:0.749894 Training_loss_avg:0.690824
Epoch:3 Step:1272 Training_loss:0.735068 Training_loss_avg:0.692195
Epoch:3 Step:1280 Training_loss:0.715724 Training_loss_avg:0.692823
Epoch:3 Step:1288 Training_loss:0.641798 Training_loss_avg:0.691622
Epoch:3 Step:1296 Training_loss:0.678997 Training_loss_avg:0.690988
Epoch:3 Step:1304 Training_loss:0.622362 Training_loss_avg:0.689207
Epoch:3 Step:1312 Training_loss:0.709324 Training_loss_avg:0.689767
Epoch:3 Step:1320 Training_loss:0.686704 Training_loss_avg:0.690095
Epoch:3 Step:1328 Training_loss:0.730334 Training_loss_avg:0.690098
Epoch:3 Step:1336 Training_loss:0.734099 Training_loss_avg:0.691788
Epoch:3 Step:1344 Training_loss:0.708449 Training_loss_avg:0.692158
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:1344 Val_loss:0.681541
Epoch:3 Step:1352 Training_loss:0.753793 Training_loss_avg:0.693272
Epoch:3 Step:1360 Training_loss:0.699495 Training_loss_avg:0.693927
Epoch:3 Step:1368 Training_loss:0.749382 Training_loss_avg:0.695355
Epoch:3 Step:1376 Training_loss:0.683240 Training_loss_avg:0.694966
Epoch:3 Step:1384 Training_loss:0.645458 Training_loss_avg:0.693908
Epoch:3 Step:1392 Training_loss:0.673869 Training_loss_avg:0.693687
Epoch:3 Step:1400 Training_loss:0.728014 Training_loss_avg:0.694357
Epoch:3 Step:1408 Training_loss:0.679489 Training_loss_avg:0.694147
Epoch:3 Step:1416 Training_loss:0.666170 Training_loss_avg:0.694264
Epoch:3 Step:1424 Training_loss:0.695367 Training_loss_avg:0.694066
Epoch:3 Step:1432 Training_loss:0.697706 Training_loss_avg:0.693016
Epoch:3 Step:1440 Training_loss:0.645755 Training_loss_avg:0.691557
Validating:


52it [00:07,  6.58it/s]


Epoch:3 Step:1440 Val_loss:0.685674
Epoch:3 Step:1448 Training_loss:0.631984 Training_loss_avg:0.690934
Epoch:3 Step:1456 Training_loss:0.662738 Training_loss_avg:0.688833
Epoch:3 Step:1464 Training_loss:0.692930 Training_loss_avg:0.688469
Epoch:3 Step:1472 Training_loss:0.686942 Training_loss_avg:0.688063
Epoch:3 Step:1480 Training_loss:0.676966 Training_loss_avg:0.687671
Epoch:3 Step:1488 Training_loss:0.694979 Training_loss_avg:0.687699
Epoch:3 Step:1496 Training_loss:0.719622 Training_loss_avg:0.688173
Epoch:3 Step:1504 Training_loss:0.669894 Training_loss_avg:0.688693
Epoch:3 Step:1512 Training_loss:0.724210 Training_loss_avg:0.689437
Epoch:3 Step:1520 Training_loss:0.703698 Training_loss_avg:0.689930
Epoch:3 Step:1528 Training_loss:0.683666 Training_loss_avg:0.690792
Epoch:3 Step:1536 Training_loss:0.702262 Training_loss_avg:0.691034
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:1536 Val_loss:0.682639
Epoch:3 Step:1544 Training_loss:0.663667 Training_loss_avg:0.691120
Epoch:3 Step:1552 Training_loss:0.697561 Training_loss_avg:0.691004
Epoch:3 Step:1560 Training_loss:0.722067 Training_loss_avg:0.692074
Epoch:3 Step:1568 Training_loss:0.683205 Training_loss_avg:0.692123
Epoch:3 Step:1576 Training_loss:0.696982 Training_loss_avg:0.692482
Epoch:3 Step:1584 Training_loss:0.645900 Training_loss_avg:0.692012
Epoch:3 Step:1592 Training_loss:0.705046 Training_loss_avg:0.692586
Epoch:3 Step:1600 Training_loss:0.680560 Training_loss_avg:0.692364
Epoch:3 Step:1608 Training_loss:0.738959 Training_loss_avg:0.692746
Epoch:3 Step:1616 Training_loss:0.645720 Training_loss_avg:0.691398
Epoch:3 Step:1624 Training_loss:0.720195 Training_loss_avg:0.692140
Epoch:3 Step:1632 Training_loss:0.690595 Training_loss_avg:0.691534
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:1632 Val_loss:0.683766
Epoch:3 Step:1640 Training_loss:0.700792 Training_loss_avg:0.692163
Epoch:3 Step:1648 Training_loss:0.665328 Training_loss_avg:0.691357
Epoch:3 Step:1656 Training_loss:0.725190 Training_loss_avg:0.693243
Epoch:3 Step:1664 Training_loss:0.699861 Training_loss_avg:0.692242
Epoch:3 Step:1672 Training_loss:0.701401 Training_loss_avg:0.691569
Epoch:3 Step:1680 Training_loss:0.699773 Training_loss_avg:0.691250
Epoch:3 Step:1688 Training_loss:0.709130 Training_loss_avg:0.692597
Epoch:3 Step:1696 Training_loss:0.713368 Training_loss_avg:0.693284
Epoch:3 Step:1704 Training_loss:0.712076 Training_loss_avg:0.695078
Epoch:3 Step:1712 Training_loss:0.697880 Training_loss_avg:0.694849
Epoch:3 Step:1720 Training_loss:0.684695 Training_loss_avg:0.694809
Epoch:3 Step:1728 Training_loss:0.713724 Training_loss_avg:0.694477
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:1728 Val_loss:0.695616
Epoch:3 Step:1736 Training_loss:0.682434 Training_loss_avg:0.693444
Epoch:3 Step:1744 Training_loss:0.686671 Training_loss_avg:0.693008
Epoch:3 Step:1752 Training_loss:0.694788 Training_loss_avg:0.691828
Epoch:3 Step:1760 Training_loss:0.649478 Training_loss_avg:0.690828
Epoch:3 Step:1768 Training_loss:0.653071 Training_loss_avg:0.688901
Epoch:3 Step:1776 Training_loss:0.757390 Training_loss_avg:0.690384
Epoch:3 Step:1784 Training_loss:0.753305 Training_loss_avg:0.692541
Epoch:3 Step:1792 Training_loss:0.641346 Training_loss_avg:0.691891
Epoch:3 Step:1800 Training_loss:0.704261 Training_loss_avg:0.691416
Epoch:3 Step:1808 Training_loss:0.674445 Training_loss_avg:0.691315
Epoch:3 Step:1816 Training_loss:0.724420 Training_loss_avg:0.692480
Epoch:3 Step:1824 Training_loss:0.745007 Training_loss_avg:0.693473
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:1824 Val_loss:0.695093
Epoch:3 Step:1832 Training_loss:0.695878 Training_loss_avg:0.693436
Epoch:3 Step:1840 Training_loss:0.684089 Training_loss_avg:0.694203
Epoch:3 Step:1848 Training_loss:0.669699 Training_loss_avg:0.694957
Epoch:3 Step:1856 Training_loss:0.687270 Training_loss_avg:0.695448
Epoch:3 Step:1864 Training_loss:0.669233 Training_loss_avg:0.694974
Epoch:3 Step:1872 Training_loss:0.682467 Training_loss_avg:0.694884
Epoch:3 Step:1880 Training_loss:0.677036 Training_loss_avg:0.694886
Epoch:3 Step:1888 Training_loss:0.672112 Training_loss_avg:0.694428
Epoch:3 Step:1896 Training_loss:0.729020 Training_loss_avg:0.694616
Epoch:3 Step:1904 Training_loss:0.642065 Training_loss_avg:0.694060
Epoch:3 Step:1912 Training_loss:0.745916 Training_loss_avg:0.694494
Epoch:3 Step:1920 Training_loss:0.683144 Training_loss_avg:0.694083
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:1920 Val_loss:0.681749
Epoch:3 Step:1928 Training_loss:0.622026 Training_loss_avg:0.692850
Epoch:3 Step:1936 Training_loss:0.726749 Training_loss_avg:0.693340
Epoch:3 Step:1944 Training_loss:0.774433 Training_loss_avg:0.695555
Epoch:3 Step:1952 Training_loss:0.732061 Training_loss_avg:0.696245
Epoch:3 Step:1960 Training_loss:0.657603 Training_loss_avg:0.694956
Epoch:3 Step:1968 Training_loss:0.813930 Training_loss_avg:0.697570
Epoch:3 Step:1976 Training_loss:0.603233 Training_loss_avg:0.695695
Epoch:3 Step:1984 Training_loss:0.685787 Training_loss_avg:0.696493
Epoch:3 Step:1992 Training_loss:0.682845 Training_loss_avg:0.696049
Epoch:3 Step:2000 Training_loss:0.683771 Training_loss_avg:0.696113
Epoch:3 Step:2008 Training_loss:0.686189 Training_loss_avg:0.695058
Epoch:3 Step:2016 Training_loss:0.694580 Training_loss_avg:0.696035
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:2016 Val_loss:0.681759
Epoch:3 Step:2024 Training_loss:0.578775 Training_loss_avg:0.693207
Epoch:3 Step:2032 Training_loss:0.752690 Training_loss_avg:0.694449
Epoch:3 Step:2040 Training_loss:0.715281 Training_loss_avg:0.694738
Epoch:3 Step:2048 Training_loss:0.646040 Training_loss_avg:0.694353
Epoch:3 Step:2056 Training_loss:0.664190 Training_loss_avg:0.693133
Epoch:3 Step:2064 Training_loss:0.626209 Training_loss_avg:0.691660
Epoch:3 Step:2072 Training_loss:0.745038 Training_loss_avg:0.692532
Epoch:3 Step:2080 Training_loss:0.668785 Training_loss_avg:0.691913
Epoch:3 Step:2088 Training_loss:0.657739 Training_loss_avg:0.690885
Epoch:3 Step:2096 Training_loss:0.597959 Training_loss_avg:0.688577
Epoch:3 Step:2104 Training_loss:0.639309 Training_loss_avg:0.687121
Epoch:3 Step:2112 Training_loss:0.727904 Training_loss_avg:0.687722
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:2112 Val_loss:0.681616
Epoch:3 Step:2120 Training_loss:0.672376 Training_loss_avg:0.687475
Epoch:3 Step:2128 Training_loss:0.672958 Training_loss_avg:0.686660
Epoch:3 Step:2136 Training_loss:0.704239 Training_loss_avg:0.687096
Epoch:3 Step:2144 Training_loss:0.820381 Training_loss_avg:0.689770
Epoch:3 Step:2152 Training_loss:0.737723 Training_loss_avg:0.690629
Epoch:3 Step:2160 Training_loss:0.746923 Training_loss_avg:0.692578
Epoch:3 Step:2168 Training_loss:0.695289 Training_loss_avg:0.693422
Epoch:3 Step:2176 Training_loss:0.879902 Training_loss_avg:0.695873
Epoch:3 Step:2184 Training_loss:0.721509 Training_loss_avg:0.695237
Epoch:3 Step:2192 Training_loss:0.586275 Training_loss_avg:0.694135
Epoch:3 Step:2200 Training_loss:0.707356 Training_loss_avg:0.694197
Epoch:3 Step:2208 Training_loss:0.692503 Training_loss_avg:0.694558
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:2208 Val_loss:0.681240
Epoch:3 Step:2216 Training_loss:0.717193 Training_loss_avg:0.694414
Epoch:3 Step:2224 Training_loss:0.719959 Training_loss_avg:0.693913
Epoch:3 Step:2232 Training_loss:0.750275 Training_loss_avg:0.695001
Epoch:3 Step:2240 Training_loss:0.660952 Training_loss_avg:0.694538
Epoch:3 Step:2248 Training_loss:0.652736 Training_loss_avg:0.694199
Epoch:3 Step:2256 Training_loss:0.650274 Training_loss_avg:0.693459
Epoch:3 Step:2264 Training_loss:0.712348 Training_loss_avg:0.694321
Epoch:3 Step:2272 Training_loss:0.761758 Training_loss_avg:0.695907
Epoch:3 Step:2280 Training_loss:0.689729 Training_loss_avg:0.696161
Epoch:3 Step:2288 Training_loss:0.707465 Training_loss_avg:0.696868
Epoch:3 Step:2296 Training_loss:0.686179 Training_loss_avg:0.696011
Epoch:3 Step:2304 Training_loss:0.665931 Training_loss_avg:0.696488
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:2304 Val_loss:0.685496
Epoch:3 Step:2312 Training_loss:0.670533 Training_loss_avg:0.694981
Epoch:3 Step:2320 Training_loss:0.699014 Training_loss_avg:0.695298
Epoch:3 Step:2328 Training_loss:0.665351 Training_loss_avg:0.696165
Epoch:3 Step:2336 Training_loss:0.723666 Training_loss_avg:0.696103
Epoch:3 Step:2344 Training_loss:0.647671 Training_loss_avg:0.693568
Epoch:3 Step:2352 Training_loss:0.641274 Training_loss_avg:0.691752
Epoch:3 Step:2360 Training_loss:0.664321 Training_loss_avg:0.691886
Epoch:3 Step:2368 Training_loss:0.670475 Training_loss_avg:0.689017
Epoch:3 Step:2376 Training_loss:0.721338 Training_loss_avg:0.691379
Epoch:3 Step:2384 Training_loss:0.630192 Training_loss_avg:0.690267
Epoch:3 Step:2392 Training_loss:0.616415 Training_loss_avg:0.688939
Epoch:3 Step:2400 Training_loss:0.648094 Training_loss_avg:0.688225
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:2400 Val_loss:0.680930
Epoch:3 Step:2408 Training_loss:0.694206 Training_loss_avg:0.688386
Epoch:3 Step:2416 Training_loss:0.625126 Training_loss_avg:0.686997
Epoch:3 Step:2424 Training_loss:0.724563 Training_loss_avg:0.689912
Epoch:3 Step:2432 Training_loss:0.704689 Training_loss_avg:0.688952
Epoch:3 Step:2440 Training_loss:0.704319 Training_loss_avg:0.688733
Epoch:3 Step:2448 Training_loss:0.776549 Training_loss_avg:0.691343
Epoch:3 Step:2456 Training_loss:0.743574 Training_loss_avg:0.692931
Epoch:3 Step:2464 Training_loss:0.906742 Training_loss_avg:0.698542
Epoch:3 Step:2472 Training_loss:0.756110 Training_loss_avg:0.698763
Epoch:3 Step:2480 Training_loss:0.644844 Training_loss_avg:0.698284
Epoch:3 Step:2488 Training_loss:0.671670 Training_loss_avg:0.698563
Epoch:3 Step:2496 Training_loss:0.718742 Training_loss_avg:0.700978
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:2496 Val_loss:0.680545
Epoch:3 Step:2504 Training_loss:0.628180 Training_loss_avg:0.700756
Epoch:3 Step:2512 Training_loss:0.676574 Training_loss_avg:0.699729
Epoch:3 Step:2520 Training_loss:0.607555 Training_loss_avg:0.698433
Epoch:3 Step:2528 Training_loss:0.740895 Training_loss_avg:0.699792
Epoch:3 Step:2536 Training_loss:0.690208 Training_loss_avg:0.699511
Epoch:3 Step:2544 Training_loss:0.695335 Training_loss_avg:0.697010
Epoch:3 Step:2552 Training_loss:0.630890 Training_loss_avg:0.694873
Epoch:3 Step:2560 Training_loss:0.641342 Training_loss_avg:0.692762
Epoch:3 Step:2568 Training_loss:0.644288 Training_loss_avg:0.691742
Epoch:3 Step:2576 Training_loss:0.747472 Training_loss_avg:0.689093
Epoch:3 Step:2584 Training_loss:0.712302 Training_loss_avg:0.688909
Epoch:3 Step:2592 Training_loss:0.766944 Training_loss_avg:0.692522
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:2592 Val_loss:0.680731
Epoch:3 Step:2600 Training_loss:0.640178 Training_loss_avg:0.691179
Epoch:3 Step:2608 Training_loss:0.701355 Training_loss_avg:0.691356
Epoch:3 Step:2616 Training_loss:0.771552 Training_loss_avg:0.692443
Epoch:3 Step:2624 Training_loss:0.666715 Training_loss_avg:0.691378
Epoch:3 Step:2632 Training_loss:0.775984 Training_loss_avg:0.691892
Epoch:3 Step:2640 Training_loss:0.746997 Training_loss_avg:0.693613
Epoch:3 Step:2648 Training_loss:0.690993 Training_loss_avg:0.694378
Epoch:3 Step:2656 Training_loss:0.776476 Training_loss_avg:0.696903
Epoch:3 Step:2664 Training_loss:0.694250 Training_loss_avg:0.696541
Epoch:3 Step:2672 Training_loss:0.690894 Training_loss_avg:0.695123
Epoch:3 Step:2680 Training_loss:0.685185 Training_loss_avg:0.695032
Epoch:3 Step:2688 Training_loss:0.722617 Training_loss_avg:0.695335
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:2688 Val_loss:0.683155
Epoch:3 Step:2696 Training_loss:0.674249 Training_loss_avg:0.695097
Epoch:3 Step:2704 Training_loss:0.742589 Training_loss_avg:0.696630
Epoch:3 Step:2712 Training_loss:0.688014 Training_loss_avg:0.696980
Epoch:3 Step:2720 Training_loss:0.681436 Training_loss_avg:0.696628
Epoch:3 Step:2728 Training_loss:0.683500 Training_loss_avg:0.696991
Epoch:3 Step:2736 Training_loss:0.717090 Training_loss_avg:0.696859
Epoch:3 Step:2744 Training_loss:0.688313 Training_loss_avg:0.697672
Epoch:3 Step:2752 Training_loss:0.696606 Training_loss_avg:0.698779
Epoch:3 Step:2760 Training_loss:0.685782 Training_loss_avg:0.699208
Epoch:3 Step:2768 Training_loss:0.656583 Training_loss_avg:0.698930
Epoch:3 Step:2776 Training_loss:0.705993 Training_loss_avg:0.698623
Epoch:3 Step:2784 Training_loss:0.693412 Training_loss_avg:0.699888
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:2784 Val_loss:0.690697
Epoch:3 Step:2792 Training_loss:0.692382 Training_loss_avg:0.701407
Epoch:3 Step:2800 Training_loss:0.677210 Training_loss_avg:0.701989
Epoch:3 Step:2808 Training_loss:0.680331 Training_loss_avg:0.701712
Epoch:3 Step:2816 Training_loss:0.650981 Training_loss_avg:0.702229
Epoch:3 Step:2824 Training_loss:0.701373 Training_loss_avg:0.701765
Epoch:3 Step:2832 Training_loss:0.722443 Training_loss_avg:0.702120
Epoch:3 Step:2840 Training_loss:0.695901 Training_loss_avg:0.701952
Epoch:3 Step:2848 Training_loss:0.698475 Training_loss_avg:0.700391
Epoch:3 Step:2856 Training_loss:0.773294 Training_loss_avg:0.700985
Epoch:3 Step:2864 Training_loss:0.688058 Training_loss_avg:0.696611
Epoch:3 Step:2872 Training_loss:0.695451 Training_loss_avg:0.695398
Epoch:3 Step:2880 Training_loss:0.698976 Training_loss_avg:0.696481
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:2880 Val_loss:0.694581
Epoch:3 Step:2888 Training_loss:0.722450 Training_loss_avg:0.697496
Epoch:3 Step:2896 Training_loss:0.725634 Training_loss_avg:0.697634
Epoch:3 Step:2904 Training_loss:0.720784 Training_loss_avg:0.699486
Epoch:3 Step:2912 Training_loss:0.693831 Training_loss_avg:0.699831
Epoch:3 Step:2920 Training_loss:0.702867 Training_loss_avg:0.701738
Epoch:3 Step:2928 Training_loss:0.695685 Training_loss_avg:0.700833
Epoch:3 Step:2936 Training_loss:0.690206 Training_loss_avg:0.700833
Epoch:3 Step:2944 Training_loss:0.697424 Training_loss_avg:0.700875
Epoch:3 Step:2952 Training_loss:0.692488 Training_loss_avg:0.702107
Epoch:3 Step:2960 Training_loss:0.721250 Training_loss_avg:0.703705
Epoch:3 Step:2968 Training_loss:0.642374 Training_loss_avg:0.703667
Epoch:3 Step:2976 Training_loss:0.713896 Training_loss_avg:0.702995
Validating:


52it [00:07,  6.60it/s]


Epoch:3 Step:2976 Val_loss:0.680880
Epoch:3 Step:2984 Training_loss:0.691496 Training_loss_avg:0.702579
Epoch:3 Step:2992 Training_loss:0.680987 Training_loss_avg:0.700860
Epoch:3 Step:3000 Training_loss:0.785095 Training_loss_avg:0.703759
Epoch:3 Step:3008 Training_loss:0.743419 Training_loss_avg:0.704600
Epoch:3 Step:3016 Training_loss:0.732108 Training_loss_avg:0.703811
Epoch:3 Step:3024 Training_loss:0.672343 Training_loss_avg:0.703923
Epoch:3 Step:3032 Training_loss:0.701616 Training_loss_avg:0.702436
Epoch:3 Step:3040 Training_loss:0.689199 Training_loss_avg:0.701280
Epoch:3 Step:3048 Training_loss:0.690921 Training_loss_avg:0.701279
Epoch:3 Step:3056 Training_loss:0.704661 Training_loss_avg:0.699842
Epoch:3 Step:3064 Training_loss:0.663309 Training_loss_avg:0.699224
Epoch:3 Step:3072 Training_loss:0.724999 Training_loss_avg:0.699906
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:3072 Val_loss:0.681065
Epoch:3 Step:3080 Training_loss:0.747293 Training_loss_avg:0.701148
Epoch:3 Step:3088 Training_loss:0.672965 Training_loss_avg:0.700155
Epoch:3 Step:3096 Training_loss:0.697008 Training_loss_avg:0.700610
Epoch:3 Step:3104 Training_loss:0.734468 Training_loss_avg:0.700448
Epoch:3 Step:3112 Training_loss:0.685347 Training_loss_avg:0.700394
Epoch:3 Step:3120 Training_loss:0.696759 Training_loss_avg:0.700701
Epoch:3 Step:3128 Training_loss:0.680691 Training_loss_avg:0.700645
Epoch:3 Step:3136 Training_loss:0.654131 Training_loss_avg:0.699385
Epoch:3 Step:3144 Training_loss:0.673711 Training_loss_avg:0.699093
Epoch:3 Step:3152 Training_loss:0.659673 Training_loss_avg:0.698355
Epoch:3 Step:3160 Training_loss:0.626059 Training_loss_avg:0.697160
Epoch:3 Step:3168 Training_loss:0.668490 Training_loss_avg:0.697398
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:3168 Val_loss:0.681254
Epoch:3 Step:3176 Training_loss:0.688020 Training_loss_avg:0.697039
Epoch:3 Step:3184 Training_loss:0.683678 Training_loss_avg:0.696844
Epoch:3 Step:3192 Training_loss:0.684610 Training_loss_avg:0.696689
Epoch:3 Step:3200 Training_loss:0.741685 Training_loss_avg:0.697978
Epoch:3 Step:3208 Training_loss:0.682061 Training_loss_avg:0.698013
Epoch:3 Step:3216 Training_loss:0.589315 Training_loss_avg:0.696780
Epoch:3 Step:3224 Training_loss:0.662410 Training_loss_avg:0.696000
Epoch:3 Step:3232 Training_loss:0.621751 Training_loss_avg:0.693986
Epoch:3 Step:3240 Training_loss:0.667729 Training_loss_avg:0.693423
Epoch:3 Step:3248 Training_loss:0.645006 Training_loss_avg:0.692354
Epoch:3 Step:3256 Training_loss:0.720681 Training_loss_avg:0.691301
Epoch:3 Step:3264 Training_loss:0.632433 Training_loss_avg:0.690189
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:3264 Val_loss:0.681310
Epoch:3 Step:3272 Training_loss:0.740154 Training_loss_avg:0.691083
Epoch:3 Step:3280 Training_loss:0.681593 Training_loss_avg:0.690735
Epoch:3 Step:3288 Training_loss:0.548448 Training_loss_avg:0.687255
Epoch:3 Step:3296 Training_loss:0.780734 Training_loss_avg:0.688357
Epoch:3 Step:3304 Training_loss:0.722317 Training_loss_avg:0.688388
Epoch:3 Step:3312 Training_loss:0.671680 Training_loss_avg:0.687945
Epoch:3 Step:3320 Training_loss:0.683083 Training_loss_avg:0.687549
Epoch:3 Step:3328 Training_loss:0.821264 Training_loss_avg:0.690061
Epoch:4 Step:0 Training_loss:0.829039 Training_loss_avg:0.692837
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:0 Val_loss:0.682903
Epoch:4 Step:8 Training_loss:0.715281 Training_loss_avg:0.693195
Epoch:4 Step:16 Training_loss:0.735468 Training_loss_avg:0.694054
Epoch:4 Step:24 Training_loss:0.653715 Training_loss_avg:0.692703
Epoch:4 Step:32 Training_loss:0.533450 Training_loss_avg:0.690525
Epoch:4 Step:40 Training_loss:0.737373 Training_loss_avg:0.690995
Epoch:4 Step:48 Training_loss:0.680448 Training_loss_avg:0.690774
Epoch:4 Step:56 Training_loss:0.729789 Training_loss_avg:0.691750
Epoch:4 Step:64 Training_loss:0.713113 Training_loss_avg:0.690310
Epoch:4 Step:72 Training_loss:0.625767 Training_loss_avg:0.687957
Epoch:4 Step:80 Training_loss:0.558522 Training_loss_avg:0.684485
Epoch:4 Step:88 Training_loss:0.667415 Training_loss_avg:0.684387
Epoch:4 Step:96 Training_loss:0.638301 Training_loss_avg:0.683120
Validating:


52it [00:07,  6.58it/s]


Epoch:4 Step:96 Val_loss:0.681655
Epoch:4 Step:104 Training_loss:0.687804 Training_loss_avg:0.683092
Epoch:4 Step:112 Training_loss:0.709394 Training_loss_avg:0.683462
Epoch:4 Step:120 Training_loss:0.747995 Training_loss_avg:0.684329
Epoch:4 Step:128 Training_loss:0.692289 Training_loss_avg:0.684908
Epoch:4 Step:136 Training_loss:0.632408 Training_loss_avg:0.683056
Epoch:4 Step:144 Training_loss:0.695718 Training_loss_avg:0.682025
Epoch:4 Step:152 Training_loss:0.615142 Training_loss_avg:0.680868
Epoch:4 Step:160 Training_loss:0.715064 Training_loss_avg:0.681230
Epoch:4 Step:168 Training_loss:0.708583 Training_loss_avg:0.680712
Epoch:4 Step:176 Training_loss:0.714203 Training_loss_avg:0.681289
Epoch:4 Step:184 Training_loss:0.655612 Training_loss_avg:0.680466
Epoch:4 Step:192 Training_loss:0.744337 Training_loss_avg:0.681739
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:192 Val_loss:0.681470
Epoch:4 Step:200 Training_loss:0.695053 Training_loss_avg:0.682557
Epoch:4 Step:208 Training_loss:0.723921 Training_loss_avg:0.683562
Epoch:4 Step:216 Training_loss:0.645373 Training_loss_avg:0.683276
Epoch:4 Step:224 Training_loss:0.627521 Training_loss_avg:0.683305
Epoch:4 Step:232 Training_loss:0.682587 Training_loss_avg:0.683587
Epoch:4 Step:240 Training_loss:0.658083 Training_loss_avg:0.682988
Epoch:4 Step:248 Training_loss:0.685553 Training_loss_avg:0.683026
Epoch:4 Step:256 Training_loss:0.739104 Training_loss_avg:0.684115
Epoch:4 Step:264 Training_loss:0.626975 Training_loss_avg:0.681821
Epoch:4 Step:272 Training_loss:0.705615 Training_loss_avg:0.682292
Epoch:4 Step:280 Training_loss:0.606296 Training_loss_avg:0.682632
Epoch:4 Step:288 Training_loss:0.771708 Training_loss_avg:0.684818
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:288 Val_loss:0.681521
Epoch:4 Step:296 Training_loss:0.723856 Training_loss_avg:0.686860
Epoch:4 Step:304 Training_loss:0.645000 Training_loss_avg:0.686405
Epoch:4 Step:312 Training_loss:0.767481 Training_loss_avg:0.688855
Epoch:4 Step:320 Training_loss:0.755790 Training_loss_avg:0.689557
Epoch:4 Step:328 Training_loss:0.762648 Training_loss_avg:0.692161
Epoch:4 Step:336 Training_loss:0.677028 Training_loss_avg:0.690899
Epoch:4 Step:344 Training_loss:0.678129 Training_loss_avg:0.690830
Epoch:4 Step:352 Training_loss:0.764608 Training_loss_avg:0.695153
Epoch:4 Step:360 Training_loss:0.637282 Training_loss_avg:0.692284
Epoch:4 Step:368 Training_loss:0.702447 Training_loss_avg:0.691886
Epoch:4 Step:376 Training_loss:0.700623 Training_loss_avg:0.692465
Epoch:4 Step:384 Training_loss:0.682513 Training_loss_avg:0.692454
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:384 Val_loss:0.683243
Epoch:4 Step:392 Training_loss:0.695410 Training_loss_avg:0.689937
Epoch:4 Step:400 Training_loss:0.686846 Training_loss_avg:0.687093
Epoch:4 Step:408 Training_loss:0.716389 Training_loss_avg:0.687115
Epoch:4 Step:416 Training_loss:0.714067 Training_loss_avg:0.686687
Epoch:4 Step:424 Training_loss:0.660435 Training_loss_avg:0.686821
Epoch:4 Step:432 Training_loss:0.665015 Training_loss_avg:0.689453
Epoch:4 Step:440 Training_loss:0.692723 Training_loss_avg:0.688560
Epoch:4 Step:448 Training_loss:0.733249 Training_loss_avg:0.689616
Epoch:4 Step:456 Training_loss:0.709945 Training_loss_avg:0.689219
Epoch:4 Step:464 Training_loss:0.717314 Training_loss_avg:0.689303
Epoch:4 Step:472 Training_loss:0.670281 Training_loss_avg:0.690193
Epoch:4 Step:480 Training_loss:0.720097 Training_loss_avg:0.693425
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:480 Val_loss:0.687019
Epoch:4 Step:488 Training_loss:0.689572 Training_loss_avg:0.693868
Epoch:4 Step:496 Training_loss:0.664446 Training_loss_avg:0.694391
Epoch:4 Step:504 Training_loss:0.721265 Training_loss_avg:0.695060
Epoch:4 Step:512 Training_loss:0.702068 Training_loss_avg:0.694913
Epoch:4 Step:520 Training_loss:0.685563 Training_loss_avg:0.693665
Epoch:4 Step:528 Training_loss:0.702794 Training_loss_avg:0.693875
Epoch:4 Step:536 Training_loss:0.657266 Training_loss_avg:0.694372
Epoch:4 Step:544 Training_loss:0.686901 Training_loss_avg:0.694196
Epoch:4 Step:552 Training_loss:0.676705 Training_loss_avg:0.695427
Epoch:4 Step:560 Training_loss:0.674696 Training_loss_avg:0.694620
Epoch:4 Step:568 Training_loss:0.692717 Training_loss_avg:0.694302
Epoch:4 Step:576 Training_loss:0.671730 Training_loss_avg:0.693453
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:576 Val_loss:0.689680
Epoch:4 Step:584 Training_loss:0.677731 Training_loss_avg:0.693895
Epoch:4 Step:592 Training_loss:0.676809 Training_loss_avg:0.692545
Epoch:4 Step:600 Training_loss:0.705144 Training_loss_avg:0.692746
Epoch:4 Step:608 Training_loss:0.692354 Training_loss_avg:0.692115
Epoch:4 Step:616 Training_loss:0.699723 Training_loss_avg:0.693202
Epoch:4 Step:624 Training_loss:0.733592 Training_loss_avg:0.695323
Epoch:4 Step:632 Training_loss:0.670269 Training_loss_avg:0.695077
Epoch:4 Step:640 Training_loss:0.707734 Training_loss_avg:0.696070
Epoch:4 Step:648 Training_loss:0.691073 Training_loss_avg:0.696181
Epoch:4 Step:656 Training_loss:0.703380 Training_loss_avg:0.695466
Epoch:4 Step:664 Training_loss:0.710706 Training_loss_avg:0.697141
Epoch:4 Step:672 Training_loss:0.716207 Training_loss_avg:0.697352
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:672 Val_loss:0.691280
Epoch:4 Step:680 Training_loss:0.687100 Training_loss_avg:0.698969
Epoch:4 Step:688 Training_loss:0.737381 Training_loss_avg:0.698282
Epoch:4 Step:696 Training_loss:0.714703 Training_loss_avg:0.698099
Epoch:4 Step:704 Training_loss:0.694952 Training_loss_avg:0.699098
Epoch:4 Step:712 Training_loss:0.686599 Training_loss_avg:0.697480
Epoch:4 Step:720 Training_loss:0.695975 Training_loss_avg:0.696284
Epoch:4 Step:728 Training_loss:0.697019 Training_loss_avg:0.694971
Epoch:4 Step:736 Training_loss:0.696693 Training_loss_avg:0.695365
Epoch:4 Step:744 Training_loss:0.691428 Training_loss_avg:0.695631
Epoch:4 Step:752 Training_loss:0.703414 Training_loss_avg:0.694407
Epoch:4 Step:760 Training_loss:0.692734 Training_loss_avg:0.695516
Epoch:4 Step:768 Training_loss:0.639077 Training_loss_avg:0.694248
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:768 Val_loss:0.691942
Epoch:4 Step:776 Training_loss:0.705298 Training_loss_avg:0.694342
Epoch:4 Step:784 Training_loss:0.677488 Training_loss_avg:0.694241
Epoch:4 Step:792 Training_loss:0.661562 Training_loss_avg:0.693565
Epoch:4 Step:800 Training_loss:0.678132 Training_loss_avg:0.693390
Epoch:4 Step:808 Training_loss:0.700740 Training_loss_avg:0.693077
Epoch:4 Step:816 Training_loss:0.713608 Training_loss_avg:0.693068
Epoch:4 Step:824 Training_loss:0.692392 Training_loss_avg:0.693707
Epoch:4 Step:832 Training_loss:0.677691 Training_loss_avg:0.693961
Epoch:4 Step:840 Training_loss:0.705876 Training_loss_avg:0.694224
Epoch:4 Step:848 Training_loss:0.690992 Training_loss_avg:0.693379
Epoch:4 Step:856 Training_loss:0.694461 Training_loss_avg:0.693069
Epoch:4 Step:864 Training_loss:0.720083 Training_loss_avg:0.693124
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:864 Val_loss:0.688846
Epoch:4 Step:872 Training_loss:0.673260 Training_loss_avg:0.693184
Epoch:4 Step:880 Training_loss:0.724970 Training_loss_avg:0.693281
Epoch:4 Step:888 Training_loss:0.720510 Training_loss_avg:0.693900
Epoch:4 Step:896 Training_loss:0.708295 Training_loss_avg:0.694777
Epoch:4 Step:904 Training_loss:0.630806 Training_loss_avg:0.692968
Epoch:4 Step:912 Training_loss:0.739062 Training_loss_avg:0.693708
Epoch:4 Step:920 Training_loss:0.754779 Training_loss_avg:0.695092
Epoch:4 Step:928 Training_loss:0.724591 Training_loss_avg:0.695528
Epoch:4 Step:936 Training_loss:0.685222 Training_loss_avg:0.696087
Epoch:4 Step:944 Training_loss:0.713298 Training_loss_avg:0.696615
Epoch:4 Step:952 Training_loss:0.715566 Training_loss_avg:0.697392
Epoch:4 Step:960 Training_loss:0.701819 Training_loss_avg:0.697935
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:960 Val_loss:0.684900
Epoch:4 Step:968 Training_loss:0.678709 Training_loss_avg:0.697655
Epoch:4 Step:976 Training_loss:0.704376 Training_loss_avg:0.698308
Epoch:4 Step:984 Training_loss:0.652234 Training_loss_avg:0.697798
Epoch:4 Step:992 Training_loss:0.685495 Training_loss_avg:0.697971
Epoch:4 Step:1000 Training_loss:0.683226 Training_loss_avg:0.697533
Epoch:4 Step:1008 Training_loss:0.612231 Training_loss_avg:0.695931
Epoch:4 Step:1016 Training_loss:0.734372 Training_loss_avg:0.696624
Epoch:4 Step:1024 Training_loss:0.640718 Training_loss_avg:0.694766
Epoch:4 Step:1032 Training_loss:0.620769 Training_loss_avg:0.693776
Epoch:4 Step:1040 Training_loss:0.634211 Training_loss_avg:0.692306
Epoch:4 Step:1048 Training_loss:0.665381 Training_loss_avg:0.691792
Epoch:4 Step:1056 Training_loss:0.573891 Training_loss_avg:0.689202
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:1056 Val_loss:0.680166
Epoch:4 Step:1064 Training_loss:0.707257 Training_loss_avg:0.689133
Epoch:4 Step:1072 Training_loss:0.691331 Training_loss_avg:0.688636
Epoch:4 Step:1080 Training_loss:0.760301 Training_loss_avg:0.690100
Epoch:4 Step:1088 Training_loss:0.597116 Training_loss_avg:0.687294
Epoch:4 Step:1096 Training_loss:0.723836 Training_loss_avg:0.687477
Epoch:4 Step:1104 Training_loss:0.650028 Training_loss_avg:0.686578
Epoch:4 Step:1112 Training_loss:0.772706 Training_loss_avg:0.688301
Epoch:4 Step:1120 Training_loss:0.630037 Training_loss_avg:0.686982
Epoch:4 Step:1128 Training_loss:0.601022 Training_loss_avg:0.685062
Epoch:4 Step:1136 Training_loss:0.775837 Training_loss_avg:0.686645
Epoch:4 Step:1144 Training_loss:0.787839 Training_loss_avg:0.688573
Epoch:4 Step:1152 Training_loss:0.588596 Training_loss_avg:0.686277
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:1152 Val_loss:0.681711
Epoch:4 Step:1160 Training_loss:0.723916 Training_loss_avg:0.686900
Epoch:4 Step:1168 Training_loss:0.821857 Training_loss_avg:0.690556
Epoch:4 Step:1176 Training_loss:0.847168 Training_loss_avg:0.693393
Epoch:4 Step:1184 Training_loss:0.774330 Training_loss_avg:0.695330
Epoch:4 Step:1192 Training_loss:0.744640 Training_loss_avg:0.696992
Epoch:4 Step:1200 Training_loss:0.744075 Training_loss_avg:0.698311
Epoch:4 Step:1208 Training_loss:0.689179 Training_loss_avg:0.698079
Epoch:4 Step:1216 Training_loss:0.671148 Training_loss_avg:0.697230
Epoch:4 Step:1224 Training_loss:0.658241 Training_loss_avg:0.696547
Epoch:4 Step:1232 Training_loss:0.701292 Training_loss_avg:0.697019
Epoch:4 Step:1240 Training_loss:0.703423 Training_loss_avg:0.696970
Epoch:4 Step:1248 Training_loss:0.682720 Training_loss_avg:0.696805
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:1248 Val_loss:0.685300
Epoch:4 Step:1256 Training_loss:0.682512 Training_loss_avg:0.696566
Epoch:4 Step:1264 Training_loss:0.679209 Training_loss_avg:0.695748
Epoch:4 Step:1272 Training_loss:0.701484 Training_loss_avg:0.696313
Epoch:4 Step:1280 Training_loss:0.686323 Training_loss_avg:0.695540
Epoch:4 Step:1288 Training_loss:0.677019 Training_loss_avg:0.694670
Epoch:4 Step:1296 Training_loss:0.734112 Training_loss_avg:0.695186
Epoch:4 Step:1304 Training_loss:0.692099 Training_loss_avg:0.696412
Epoch:4 Step:1312 Training_loss:0.702680 Training_loss_avg:0.695684
Epoch:4 Step:1320 Training_loss:0.698449 Training_loss_avg:0.694558
Epoch:4 Step:1328 Training_loss:0.688548 Training_loss_avg:0.693837
Epoch:4 Step:1336 Training_loss:0.650859 Training_loss_avg:0.693150
Epoch:4 Step:1344 Training_loss:0.692611 Training_loss_avg:0.692736
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:1344 Val_loss:0.688609
Epoch:4 Step:1352 Training_loss:0.717709 Training_loss_avg:0.692779
Epoch:4 Step:1360 Training_loss:0.705200 Training_loss_avg:0.692847
Epoch:4 Step:1368 Training_loss:0.701652 Training_loss_avg:0.693305
Epoch:4 Step:1376 Training_loss:0.713071 Training_loss_avg:0.693479
Epoch:4 Step:1384 Training_loss:0.686415 Training_loss_avg:0.694163
Epoch:4 Step:1392 Training_loss:0.710672 Training_loss_avg:0.694666
Epoch:4 Step:1400 Training_loss:0.681752 Training_loss_avg:0.694637
Epoch:4 Step:1408 Training_loss:0.681549 Training_loss_avg:0.696023
Epoch:4 Step:1416 Training_loss:0.700107 Training_loss_avg:0.695338
Epoch:4 Step:1424 Training_loss:0.694247 Training_loss_avg:0.696409
Epoch:4 Step:1432 Training_loss:0.695052 Training_loss_avg:0.697894
Epoch:4 Step:1440 Training_loss:0.736251 Training_loss_avg:0.699935
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:1440 Val_loss:0.696050
Epoch:4 Step:1448 Training_loss:0.677160 Training_loss_avg:0.700171
Epoch:4 Step:1456 Training_loss:0.715080 Training_loss_avg:0.702994
Epoch:4 Step:1464 Training_loss:0.728733 Training_loss_avg:0.703424
Epoch:4 Step:1472 Training_loss:0.669139 Training_loss_avg:0.702980
Epoch:4 Step:1480 Training_loss:0.727477 Training_loss_avg:0.702324
Epoch:4 Step:1488 Training_loss:0.709489 Training_loss_avg:0.704571
Epoch:4 Step:1496 Training_loss:0.701183 Training_loss_avg:0.704118
Epoch:4 Step:1504 Training_loss:0.662120 Training_loss_avg:0.704360
Epoch:4 Step:1512 Training_loss:0.701353 Training_loss_avg:0.702933
Epoch:4 Step:1520 Training_loss:0.718207 Training_loss_avg:0.704696
Epoch:4 Step:1528 Training_loss:0.727794 Training_loss_avg:0.707232
Epoch:4 Step:1536 Training_loss:0.701878 Training_loss_avg:0.705752
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:1536 Val_loss:0.697507
Epoch:4 Step:1544 Training_loss:0.710089 Training_loss_avg:0.704197
Epoch:4 Step:1552 Training_loss:0.697890 Training_loss_avg:0.706383
Epoch:4 Step:1560 Training_loss:0.667142 Training_loss_avg:0.705248
Epoch:4 Step:1568 Training_loss:0.705000 Training_loss_avg:0.702911
Epoch:4 Step:1576 Training_loss:0.722924 Training_loss_avg:0.700426
Epoch:4 Step:1584 Training_loss:0.673496 Training_loss_avg:0.698409
Epoch:4 Step:1592 Training_loss:0.690468 Training_loss_avg:0.697326
Epoch:4 Step:1600 Training_loss:0.736567 Training_loss_avg:0.697176
Epoch:4 Step:1608 Training_loss:0.680671 Training_loss_avg:0.697005
Epoch:4 Step:1616 Training_loss:0.679397 Training_loss_avg:0.697170
Epoch:4 Step:1624 Training_loss:0.673752 Training_loss_avg:0.697481
Epoch:4 Step:1632 Training_loss:0.702309 Training_loss_avg:0.697501
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:1632 Val_loss:0.693232
Epoch:4 Step:1640 Training_loss:0.712161 Training_loss_avg:0.697676
Epoch:4 Step:1648 Training_loss:0.687305 Training_loss_avg:0.697767
Epoch:4 Step:1656 Training_loss:0.680854 Training_loss_avg:0.697734
Epoch:4 Step:1664 Training_loss:0.692218 Training_loss_avg:0.697994
Epoch:4 Step:1672 Training_loss:0.678476 Training_loss_avg:0.697534
Epoch:4 Step:1680 Training_loss:0.678995 Training_loss_avg:0.697388
Epoch:4 Step:1688 Training_loss:0.648568 Training_loss_avg:0.696819
Epoch:4 Step:1696 Training_loss:0.727162 Training_loss_avg:0.696680
Epoch:4 Step:1704 Training_loss:0.678111 Training_loss_avg:0.696400
Epoch:4 Step:1712 Training_loss:0.620282 Training_loss_avg:0.694752
Epoch:4 Step:1720 Training_loss:0.736211 Training_loss_avg:0.695507
Epoch:4 Step:1728 Training_loss:0.682726 Training_loss_avg:0.695391
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:1728 Val_loss:0.680072
Epoch:4 Step:1736 Training_loss:0.682897 Training_loss_avg:0.696031
Epoch:4 Step:1744 Training_loss:0.731975 Training_loss_avg:0.696819
Epoch:4 Step:1752 Training_loss:0.695742 Training_loss_avg:0.696379
Epoch:4 Step:1760 Training_loss:0.609295 Training_loss_avg:0.694461
Epoch:4 Step:1768 Training_loss:0.796435 Training_loss_avg:0.696357
Epoch:4 Step:1776 Training_loss:0.623020 Training_loss_avg:0.694556
Epoch:4 Step:1784 Training_loss:0.577401 Training_loss_avg:0.692376
Epoch:4 Step:1792 Training_loss:0.655945 Training_loss_avg:0.691281
Epoch:4 Step:1800 Training_loss:0.637131 Training_loss_avg:0.690389
Epoch:4 Step:1808 Training_loss:0.612074 Training_loss_avg:0.688999
Epoch:4 Step:1816 Training_loss:0.609864 Training_loss_avg:0.687194
Epoch:4 Step:1824 Training_loss:0.631605 Training_loss_avg:0.685941
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:1824 Val_loss:0.680737
Epoch:4 Step:1832 Training_loss:0.639226 Training_loss_avg:0.684825
Epoch:4 Step:1840 Training_loss:0.648722 Training_loss_avg:0.683074
Epoch:4 Step:1848 Training_loss:0.609042 Training_loss_avg:0.681712
Epoch:4 Step:1856 Training_loss:0.719394 Training_loss_avg:0.681798
Epoch:4 Step:1864 Training_loss:0.630296 Training_loss_avg:0.679830
Epoch:4 Step:1872 Training_loss:0.872407 Training_loss_avg:0.683895
Epoch:4 Step:1880 Training_loss:0.617844 Training_loss_avg:0.681702
Epoch:4 Step:1888 Training_loss:0.860675 Training_loss_avg:0.684726
Epoch:4 Step:1896 Training_loss:0.773863 Training_loss_avg:0.686180
Epoch:4 Step:1904 Training_loss:0.606102 Training_loss_avg:0.685059
Epoch:4 Step:1912 Training_loss:0.698942 Training_loss_avg:0.685011
Epoch:4 Step:1920 Training_loss:0.854002 Training_loss_avg:0.687727
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:1920 Val_loss:0.680387
Epoch:4 Step:1928 Training_loss:0.661670 Training_loss_avg:0.686404
Epoch:4 Step:1936 Training_loss:0.724548 Training_loss_avg:0.686858
Epoch:4 Step:1944 Training_loss:0.701000 Training_loss_avg:0.686676
Epoch:4 Step:1952 Training_loss:0.649386 Training_loss_avg:0.685706
Epoch:4 Step:1960 Training_loss:0.739167 Training_loss_avg:0.687146
Epoch:4 Step:1968 Training_loss:0.686562 Training_loss_avg:0.686778
Epoch:4 Step:1976 Training_loss:0.715181 Training_loss_avg:0.686623
Epoch:4 Step:1984 Training_loss:0.594604 Training_loss_avg:0.685045
Epoch:4 Step:1992 Training_loss:0.724551 Training_loss_avg:0.685727
Epoch:4 Step:2000 Training_loss:0.735553 Training_loss_avg:0.685706
Epoch:4 Step:2008 Training_loss:0.628429 Training_loss_avg:0.684661
Epoch:4 Step:2016 Training_loss:0.652685 Training_loss_avg:0.684127
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:2016 Val_loss:0.678738
Epoch:4 Step:2024 Training_loss:0.722884 Training_loss_avg:0.685110
Epoch:4 Step:2032 Training_loss:0.662529 Training_loss_avg:0.684314
Epoch:4 Step:2040 Training_loss:0.655532 Training_loss_avg:0.683182
Epoch:4 Step:2048 Training_loss:0.691555 Training_loss_avg:0.683267
Epoch:4 Step:2056 Training_loss:0.697251 Training_loss_avg:0.683595
Epoch:4 Step:2064 Training_loss:0.725263 Training_loss_avg:0.684256
Epoch:4 Step:2072 Training_loss:0.718747 Training_loss_avg:0.685061
Epoch:4 Step:2080 Training_loss:0.681890 Training_loss_avg:0.685119
Epoch:4 Step:2088 Training_loss:0.676598 Training_loss_avg:0.685679
Epoch:4 Step:2096 Training_loss:0.675109 Training_loss_avg:0.684638
Epoch:4 Step:2104 Training_loss:0.665079 Training_loss_avg:0.684378
Epoch:4 Step:2112 Training_loss:0.656968 Training_loss_avg:0.685111
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:2112 Val_loss:0.686138
Epoch:4 Step:2120 Training_loss:0.745027 Training_loss_avg:0.685288
Epoch:4 Step:2128 Training_loss:0.728863 Training_loss_avg:0.686211
Epoch:4 Step:2136 Training_loss:0.661978 Training_loss_avg:0.685792
Epoch:4 Step:2144 Training_loss:0.699971 Training_loss_avg:0.685152
Epoch:4 Step:2152 Training_loss:0.703476 Training_loss_avg:0.685307
Epoch:4 Step:2160 Training_loss:0.664580 Training_loss_avg:0.686412
Epoch:4 Step:2168 Training_loss:0.701161 Training_loss_avg:0.684507
Epoch:4 Step:2176 Training_loss:0.697799 Training_loss_avg:0.686003
Epoch:4 Step:2184 Training_loss:0.702770 Training_loss_avg:0.688510
Epoch:4 Step:2192 Training_loss:0.710310 Training_loss_avg:0.689597
Epoch:4 Step:2200 Training_loss:0.649803 Training_loss_avg:0.689851
Epoch:4 Step:2208 Training_loss:0.698739 Training_loss_avg:0.691584
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:2208 Val_loss:0.681108
Epoch:4 Step:2216 Training_loss:0.683587 Training_loss_avg:0.693058
Epoch:4 Step:2224 Training_loss:0.692071 Training_loss_avg:0.694268
Epoch:4 Step:2232 Training_loss:0.726754 Training_loss_avg:0.696018
Epoch:4 Step:2240 Training_loss:0.651269 Training_loss_avg:0.696069
Epoch:4 Step:2248 Training_loss:0.655113 Training_loss_avg:0.696991
Epoch:4 Step:2256 Training_loss:0.712122 Training_loss_avg:0.696845
Epoch:4 Step:2264 Training_loss:0.600833 Training_loss_avg:0.696256
Epoch:4 Step:2272 Training_loss:0.800422 Training_loss_avg:0.694816
Epoch:4 Step:2280 Training_loss:0.658746 Training_loss_avg:0.695634
Epoch:4 Step:2288 Training_loss:0.535261 Training_loss_avg:0.689126
Epoch:4 Step:2296 Training_loss:0.577361 Training_loss_avg:0.685196
Epoch:4 Step:2304 Training_loss:0.621241 Training_loss_avg:0.685499
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:2304 Val_loss:0.681278
Epoch:4 Step:2312 Training_loss:0.591992 Training_loss_avg:0.683360
Epoch:4 Step:2320 Training_loss:0.699608 Training_loss_avg:0.680272
Epoch:4 Step:2328 Training_loss:0.732822 Training_loss_avg:0.681695
Epoch:4 Step:2336 Training_loss:0.671393 Training_loss_avg:0.680632
Epoch:4 Step:2344 Training_loss:0.632413 Training_loss_avg:0.679260
Epoch:4 Step:2352 Training_loss:0.573828 Training_loss_avg:0.677749
Epoch:4 Step:2360 Training_loss:0.634515 Training_loss_avg:0.675656
Epoch:4 Step:2368 Training_loss:0.710804 Training_loss_avg:0.676141
Epoch:4 Step:2376 Training_loss:0.703892 Training_loss_avg:0.675915
Epoch:4 Step:2384 Training_loss:0.743662 Training_loss_avg:0.678896
Epoch:4 Step:2392 Training_loss:0.666557 Training_loss_avg:0.677736
Epoch:4 Step:2400 Training_loss:0.521750 Training_loss_avg:0.673460
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:2400 Val_loss:0.689214
Epoch:4 Step:2408 Training_loss:0.795439 Training_loss_avg:0.676800
Epoch:4 Step:2416 Training_loss:0.866003 Training_loss_avg:0.681067
Epoch:4 Step:2424 Training_loss:0.719381 Training_loss_avg:0.680997
Epoch:4 Step:2432 Training_loss:0.794404 Training_loss_avg:0.683634
Epoch:4 Step:2440 Training_loss:0.600614 Training_loss_avg:0.682536
Epoch:4 Step:2448 Training_loss:0.700233 Training_loss_avg:0.682709
Epoch:4 Step:2456 Training_loss:0.843934 Training_loss_avg:0.685643
Epoch:4 Step:2464 Training_loss:0.811243 Training_loss_avg:0.687363
Epoch:4 Step:2472 Training_loss:0.751253 Training_loss_avg:0.688013
Epoch:4 Step:2480 Training_loss:0.747904 Training_loss_avg:0.689333
Epoch:4 Step:2488 Training_loss:0.548393 Training_loss_avg:0.686769
Epoch:4 Step:2496 Training_loss:0.726324 Training_loss_avg:0.687793
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:2496 Val_loss:0.680805
Epoch:4 Step:2504 Training_loss:0.627517 Training_loss_avg:0.687042
Epoch:4 Step:2512 Training_loss:0.641120 Training_loss_avg:0.686725
Epoch:4 Step:2520 Training_loss:0.600156 Training_loss_avg:0.683828
Epoch:4 Step:2528 Training_loss:0.699676 Training_loss_avg:0.683244
Epoch:4 Step:2536 Training_loss:0.673230 Training_loss_avg:0.683469
Epoch:4 Step:2544 Training_loss:0.602466 Training_loss_avg:0.681519
Epoch:4 Step:2552 Training_loss:0.603834 Training_loss_avg:0.679526
Epoch:4 Step:2560 Training_loss:0.723827 Training_loss_avg:0.680711
Epoch:4 Step:2568 Training_loss:0.705862 Training_loss_avg:0.680805
Epoch:4 Step:2576 Training_loss:0.701184 Training_loss_avg:0.680873
Epoch:4 Step:2584 Training_loss:0.730497 Training_loss_avg:0.681427
Epoch:4 Step:2592 Training_loss:0.721309 Training_loss_avg:0.681647
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:2592 Val_loss:0.680709
Epoch:4 Step:2600 Training_loss:0.777292 Training_loss_avg:0.684197
Epoch:4 Step:2608 Training_loss:0.597385 Training_loss_avg:0.682170
Epoch:4 Step:2616 Training_loss:0.627446 Training_loss_avg:0.681047
Epoch:4 Step:2624 Training_loss:0.690751 Training_loss_avg:0.681021
Epoch:4 Step:2632 Training_loss:0.675538 Training_loss_avg:0.679996
Epoch:4 Step:2640 Training_loss:0.532430 Training_loss_avg:0.677620
Epoch:4 Step:2648 Training_loss:0.620652 Training_loss_avg:0.676930
Epoch:4 Step:2656 Training_loss:0.663985 Training_loss_avg:0.675968
Epoch:4 Step:2664 Training_loss:0.624416 Training_loss_avg:0.676439
Epoch:4 Step:2672 Training_loss:0.639104 Training_loss_avg:0.673213
Epoch:4 Step:2680 Training_loss:0.682600 Training_loss_avg:0.673690
Epoch:4 Step:2688 Training_loss:0.603576 Training_loss_avg:0.675056
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:2688 Val_loss:0.682386
Epoch:4 Step:2696 Training_loss:0.564870 Training_loss_avg:0.674806
Epoch:4 Step:2704 Training_loss:0.804550 Training_loss_avg:0.678473
Epoch:4 Step:2712 Training_loss:0.752997 Training_loss_avg:0.681693
Epoch:4 Step:2720 Training_loss:0.695170 Training_loss_avg:0.681604
Epoch:4 Step:2728 Training_loss:0.541710 Training_loss_avg:0.677782
Epoch:4 Step:2736 Training_loss:0.744678 Training_loss_avg:0.679247
Epoch:4 Step:2744 Training_loss:0.657311 Training_loss_avg:0.679745
Epoch:4 Step:2752 Training_loss:0.746588 Training_loss_avg:0.683201
Epoch:4 Step:2760 Training_loss:0.793794 Training_loss_avg:0.686386
Epoch:4 Step:2768 Training_loss:0.648473 Training_loss_avg:0.685140
Epoch:4 Step:2776 Training_loss:0.755719 Training_loss_avg:0.686176
Epoch:4 Step:2784 Training_loss:0.624060 Training_loss_avg:0.683784
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:2784 Val_loss:0.682210
Epoch:4 Step:2792 Training_loss:0.646433 Training_loss_avg:0.683382
Epoch:4 Step:2800 Training_loss:0.753311 Training_loss_avg:0.688013
Epoch:4 Step:2808 Training_loss:0.584845 Training_loss_avg:0.683801
Epoch:4 Step:2816 Training_loss:0.722856 Training_loss_avg:0.680938
Epoch:4 Step:2824 Training_loss:0.626789 Training_loss_avg:0.679086
Epoch:4 Step:2832 Training_loss:0.688504 Training_loss_avg:0.676968
Epoch:4 Step:2840 Training_loss:0.614184 Training_loss_avg:0.677240
Epoch:4 Step:2848 Training_loss:0.740340 Training_loss_avg:0.678042
Epoch:4 Step:2856 Training_loss:0.739404 Training_loss_avg:0.675951
Epoch:4 Step:2864 Training_loss:0.742944 Training_loss_avg:0.674585
Epoch:4 Step:2872 Training_loss:0.793494 Training_loss_avg:0.675430
Epoch:4 Step:2880 Training_loss:0.652540 Training_loss_avg:0.673523
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:2880 Val_loss:0.678981
Epoch:4 Step:2888 Training_loss:0.649306 Training_loss_avg:0.675541
Epoch:4 Step:2896 Training_loss:0.747496 Training_loss_avg:0.675964
Epoch:4 Step:2904 Training_loss:0.691570 Training_loss_avg:0.677245
Epoch:4 Step:2912 Training_loss:0.657117 Training_loss_avg:0.677565
Epoch:4 Step:2920 Training_loss:0.594052 Training_loss_avg:0.677443
Epoch:4 Step:2928 Training_loss:0.737494 Training_loss_avg:0.678200
Epoch:4 Step:2936 Training_loss:0.745602 Training_loss_avg:0.679647
Epoch:4 Step:2944 Training_loss:0.655921 Training_loss_avg:0.680716
Epoch:4 Step:2952 Training_loss:0.701560 Training_loss_avg:0.682671
Epoch:4 Step:2960 Training_loss:0.720030 Training_loss_avg:0.682595
Epoch:4 Step:2968 Training_loss:0.671938 Training_loss_avg:0.681916
Epoch:4 Step:2976 Training_loss:0.648200 Training_loss_avg:0.680857
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:2976 Val_loss:0.680875
Epoch:4 Step:2984 Training_loss:0.736502 Training_loss_avg:0.680977
Epoch:4 Step:2992 Training_loss:0.677118 Training_loss_avg:0.680093
Epoch:4 Step:3000 Training_loss:0.688547 Training_loss_avg:0.678318
Epoch:4 Step:3008 Training_loss:0.759970 Training_loss_avg:0.681570
Epoch:4 Step:3016 Training_loss:0.670491 Training_loss_avg:0.682431
Epoch:4 Step:3024 Training_loss:0.735560 Training_loss_avg:0.683327
Epoch:4 Step:3032 Training_loss:0.693576 Training_loss_avg:0.683688
Epoch:4 Step:3040 Training_loss:0.705599 Training_loss_avg:0.687151
Epoch:4 Step:3048 Training_loss:0.693567 Training_loss_avg:0.688609
Epoch:4 Step:3056 Training_loss:0.710334 Training_loss_avg:0.689536
Epoch:4 Step:3064 Training_loss:0.674133 Training_loss_avg:0.690531
Epoch:4 Step:3072 Training_loss:0.718728 Training_loss_avg:0.692123
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:3072 Val_loss:0.683647
Epoch:4 Step:3080 Training_loss:0.706132 Training_loss_avg:0.692594
Epoch:4 Step:3088 Training_loss:0.686601 Training_loss_avg:0.694254
Epoch:4 Step:3096 Training_loss:0.693783 Training_loss_avg:0.696832
Epoch:4 Step:3104 Training_loss:0.696031 Training_loss_avg:0.694662
Epoch:4 Step:3112 Training_loss:0.706606 Training_loss_avg:0.693734
Epoch:4 Step:3120 Training_loss:0.683911 Training_loss_avg:0.693509
Epoch:4 Step:3128 Training_loss:0.695375 Training_loss_avg:0.696582
Epoch:4 Step:3136 Training_loss:0.645151 Training_loss_avg:0.694592
Epoch:4 Step:3144 Training_loss:0.684842 Training_loss_avg:0.695142
Epoch:4 Step:3152 Training_loss:0.679751 Training_loss_avg:0.693806
Epoch:4 Step:3160 Training_loss:0.710617 Training_loss_avg:0.692142
Epoch:4 Step:3168 Training_loss:0.660274 Training_loss_avg:0.692378
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:3168 Val_loss:0.681969
Epoch:4 Step:3176 Training_loss:0.696548 Training_loss_avg:0.691195
Epoch:4 Step:3184 Training_loss:0.712658 Training_loss_avg:0.692967
Epoch:4 Step:3192 Training_loss:0.699628 Training_loss_avg:0.694031
Epoch:4 Step:3200 Training_loss:0.710612 Training_loss_avg:0.693177
Epoch:4 Step:3208 Training_loss:0.678805 Training_loss_avg:0.695056
Epoch:4 Step:3216 Training_loss:0.583823 Training_loss_avg:0.692275
Epoch:4 Step:3224 Training_loss:0.684445 Training_loss_avg:0.693428
Epoch:4 Step:3232 Training_loss:0.727628 Training_loss_avg:0.694211
Epoch:4 Step:3240 Training_loss:0.653907 Training_loss_avg:0.695005
Epoch:4 Step:3248 Training_loss:0.685374 Training_loss_avg:0.693906
Epoch:4 Step:3256 Training_loss:0.716797 Training_loss_avg:0.693454
Epoch:4 Step:3264 Training_loss:0.672171 Training_loss_avg:0.692038
Validating:


52it [00:07,  6.60it/s]


Epoch:4 Step:3264 Val_loss:0.681435
Epoch:4 Step:3272 Training_loss:0.729465 Training_loss_avg:0.690758
Epoch:4 Step:3280 Training_loss:0.601688 Training_loss_avg:0.689741
Epoch:4 Step:3288 Training_loss:0.667956 Training_loss_avg:0.690114
Epoch:4 Step:3296 Training_loss:0.606901 Training_loss_avg:0.687302
Epoch:4 Step:3304 Training_loss:0.645042 Training_loss_avg:0.686371
Epoch:4 Step:3312 Training_loss:0.563096 Training_loss_avg:0.684491
Epoch:4 Step:3320 Training_loss:0.593096 Training_loss_avg:0.684472
Epoch:4 Step:3328 Training_loss:0.688382 Training_loss_avg:0.683489


In [None]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """

    run_name = "test_colab_GPU_highRAM_8_lr_5e-6"
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False

    code_df = preprocess_data(file_loc='code_dataset.jsonl')
    train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

    # Loading model from checkpoint if location provided
    if online:
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=5,
          batch_size=8,
          learning_rate=5e-6,
          validate_per=250,
          run_name=run_name,
          run_descrption="Colab with highRam, lr=5e-6, validate per 259, batch 8")


In [None]:
gc.collect()
torch.cuda.empty_cache()
main()

Insecure code counts: 3729, Total code counts: 8000, Proportion 0.466125


Some weights of the model checkpoint at codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at codebert-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for p

Epoch:0 Step:0 Training_loss:0.663632 Training_loss_avg:0.663632
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:0 Val_loss:0.680914
Epoch:0 Step:8 Training_loss:0.675106 Training_loss_avg:0.669369
Epoch:0 Step:16 Training_loss:0.631563 Training_loss_avg:0.656767
Epoch:0 Step:24 Training_loss:0.692088 Training_loss_avg:0.665597
Epoch:0 Step:32 Training_loss:0.855427 Training_loss_avg:0.703563
Epoch:0 Step:40 Training_loss:0.707916 Training_loss_avg:0.704289
Epoch:0 Step:48 Training_loss:0.710373 Training_loss_avg:0.705158
Epoch:0 Step:56 Training_loss:0.650410 Training_loss_avg:0.698314
Epoch:0 Step:64 Training_loss:0.708048 Training_loss_avg:0.699396
Epoch:0 Step:72 Training_loss:0.679085 Training_loss_avg:0.697365
Epoch:0 Step:80 Training_loss:0.691223 Training_loss_avg:0.696806
Epoch:0 Step:88 Training_loss:0.628523 Training_loss_avg:0.691116
Epoch:0 Step:96 Training_loss:0.610138 Training_loss_avg:0.684887
Epoch:0 Step:104 Training_loss:0.697706 Training_loss_avg:0.685803
Epoch:0 Step:112 Training_loss:0.663660 Training_loss_avg:0.684327
Epoch:0 Step:120 Training_loss:0.744097 Tr

52it [00:07,  6.59it/s]


Epoch:0 Step:248 Val_loss:0.695354
Epoch:0 Step:256 Training_loss:0.692415 Training_loss_avg:0.701562
Epoch:0 Step:264 Training_loss:0.715929 Training_loss_avg:0.701984
Epoch:0 Step:272 Training_loss:0.671173 Training_loss_avg:0.701104
Epoch:0 Step:280 Training_loss:0.639179 Training_loss_avg:0.699384
Epoch:0 Step:288 Training_loss:0.684701 Training_loss_avg:0.698987
Epoch:0 Step:296 Training_loss:0.676312 Training_loss_avg:0.698390
Epoch:0 Step:304 Training_loss:0.684746 Training_loss_avg:0.698040
Epoch:0 Step:312 Training_loss:0.697014 Training_loss_avg:0.698015
Epoch:0 Step:320 Training_loss:0.687047 Training_loss_avg:0.697747
Epoch:0 Step:328 Training_loss:0.640855 Training_loss_avg:0.696393
Epoch:0 Step:336 Training_loss:0.673093 Training_loss_avg:0.695851
Epoch:0 Step:344 Training_loss:0.723470 Training_loss_avg:0.696478
Epoch:0 Step:352 Training_loss:0.660520 Training_loss_avg:0.695679
Epoch:0 Step:360 Training_loss:0.700552 Training_loss_avg:0.695785
Epoch:0 Step:368 Training_l

52it [00:07,  6.59it/s]


Epoch:0 Step:496 Val_loss:0.679325
Epoch:0 Step:504 Training_loss:0.681282 Training_loss_avg:0.697077
Epoch:0 Step:512 Training_loss:0.735174 Training_loss_avg:0.698507
Epoch:0 Step:520 Training_loss:0.625085 Training_loss_avg:0.696127
Epoch:0 Step:528 Training_loss:0.642361 Training_loss_avg:0.694839
Epoch:0 Step:536 Training_loss:0.666569 Training_loss_avg:0.693734
Epoch:0 Step:544 Training_loss:0.790315 Training_loss_avg:0.694574
Epoch:0 Step:552 Training_loss:0.606446 Training_loss_avg:0.692031
Epoch:0 Step:560 Training_loss:0.739076 Training_loss_avg:0.692015
Epoch:0 Step:568 Training_loss:0.691895 Training_loss_avg:0.691648
Epoch:0 Step:576 Training_loss:0.684330 Training_loss_avg:0.691212
Epoch:0 Step:584 Training_loss:0.729346 Training_loss_avg:0.692000
Epoch:0 Step:592 Training_loss:0.701372 Training_loss_avg:0.691971
Epoch:0 Step:600 Training_loss:0.690505 Training_loss_avg:0.691397
Epoch:0 Step:608 Training_loss:0.711538 Training_loss_avg:0.690306
Epoch:0 Step:616 Training_l

52it [00:07,  6.59it/s]


Epoch:0 Step:744 Val_loss:0.677549
Epoch:0 Step:752 Training_loss:0.783427 Training_loss_avg:0.690521
Epoch:0 Step:760 Training_loss:0.685764 Training_loss_avg:0.690226
Epoch:0 Step:768 Training_loss:0.585451 Training_loss_avg:0.688213
Epoch:0 Step:776 Training_loss:0.691503 Training_loss_avg:0.687873
Epoch:0 Step:784 Training_loss:0.684516 Training_loss_avg:0.687909
Epoch:0 Step:792 Training_loss:0.737905 Training_loss_avg:0.690729
Epoch:0 Step:800 Training_loss:0.706880 Training_loss_avg:0.691574
Epoch:0 Step:808 Training_loss:0.707439 Training_loss_avg:0.690643
Epoch:0 Step:816 Training_loss:0.775093 Training_loss_avg:0.694988
Epoch:0 Step:824 Training_loss:0.813372 Training_loss_avg:0.697592
Epoch:0 Step:832 Training_loss:0.663568 Training_loss_avg:0.696248
Epoch:0 Step:840 Training_loss:0.643459 Training_loss_avg:0.696565
Epoch:0 Step:848 Training_loss:0.669135 Training_loss_avg:0.691789
Epoch:0 Step:856 Training_loss:0.658796 Training_loss_avg:0.692988
Epoch:0 Step:864 Training_l

52it [00:07,  6.59it/s]


Epoch:0 Step:992 Val_loss:0.680717
Epoch:0 Step:1000 Training_loss:0.690260 Training_loss_avg:0.691434
Epoch:0 Step:1008 Training_loss:0.749213 Training_loss_avg:0.692187
Epoch:0 Step:1016 Training_loss:0.701812 Training_loss_avg:0.694102
Epoch:0 Step:1024 Training_loss:0.698803 Training_loss_avg:0.695332
Epoch:0 Step:1032 Training_loss:0.623302 Training_loss_avg:0.694859
Epoch:0 Step:1040 Training_loss:0.688232 Training_loss_avg:0.694941
Epoch:0 Step:1048 Training_loss:0.697662 Training_loss_avg:0.692513
Epoch:0 Step:1056 Training_loss:0.722672 Training_loss_avg:0.694079
Epoch:0 Step:1064 Training_loss:0.681129 Training_loss_avg:0.695198
Epoch:0 Step:1072 Training_loss:0.689582 Training_loss_avg:0.696270
Epoch:0 Step:1080 Training_loss:0.714081 Training_loss_avg:0.694980
Epoch:0 Step:1088 Training_loss:0.682169 Training_loss_avg:0.694613
Epoch:0 Step:1096 Training_loss:0.700277 Training_loss_avg:0.694510
Epoch:0 Step:1104 Training_loss:0.611214 Training_loss_avg:0.695105
Epoch:0 Step:

52it [00:07,  6.59it/s]


Epoch:0 Step:1240 Val_loss:0.677076
Epoch:0 Step:1248 Training_loss:0.600073 Training_loss_avg:0.688481
Epoch:0 Step:1256 Training_loss:0.779287 Training_loss_avg:0.690891
Epoch:0 Step:1264 Training_loss:0.655343 Training_loss_avg:0.690863
Epoch:0 Step:1272 Training_loss:0.627733 Training_loss_avg:0.689639
Epoch:0 Step:1280 Training_loss:0.776257 Training_loss_avg:0.690990
Epoch:0 Step:1288 Training_loss:0.621336 Training_loss_avg:0.688335
Epoch:0 Step:1296 Training_loss:0.726642 Training_loss_avg:0.689447
Epoch:0 Step:1304 Training_loss:0.827606 Training_loss_avg:0.692448
Epoch:0 Step:1312 Training_loss:0.673142 Training_loss_avg:0.691646
Epoch:0 Step:1320 Training_loss:0.706496 Training_loss_avg:0.691809
Epoch:0 Step:1328 Training_loss:0.734294 Training_loss_avg:0.692230
Epoch:0 Step:1336 Training_loss:0.695159 Training_loss_avg:0.692602
Epoch:0 Step:1344 Training_loss:0.639590 Training_loss_avg:0.692043
Epoch:0 Step:1352 Training_loss:0.730319 Training_loss_avg:0.692902
Epoch:0 Step

52it [00:07,  6.59it/s]


Epoch:0 Step:1488 Val_loss:0.692080
Epoch:0 Step:1496 Training_loss:0.690659 Training_loss_avg:0.698212
Epoch:0 Step:1504 Training_loss:0.747384 Training_loss_avg:0.700936
Epoch:0 Step:1512 Training_loss:0.677048 Training_loss_avg:0.699436
Epoch:0 Step:1520 Training_loss:0.711108 Training_loss_avg:0.699571
Epoch:0 Step:1528 Training_loss:0.675046 Training_loss_avg:0.699485
Epoch:0 Step:1536 Training_loss:0.708837 Training_loss_avg:0.699960
Epoch:0 Step:1544 Training_loss:0.695725 Training_loss_avg:0.700063
Epoch:0 Step:1552 Training_loss:0.673783 Training_loss_avg:0.700132
Epoch:0 Step:1560 Training_loss:0.704699 Training_loss_avg:0.702543
Epoch:0 Step:1568 Training_loss:0.722856 Training_loss_avg:0.702710
Epoch:0 Step:1576 Training_loss:0.647111 Training_loss_avg:0.700388
Epoch:0 Step:1584 Training_loss:0.722787 Training_loss_avg:0.703532
Epoch:0 Step:1592 Training_loss:0.708580 Training_loss_avg:0.706026
Epoch:0 Step:1600 Training_loss:0.703324 Training_loss_avg:0.703586
Epoch:0 Step

52it [00:07,  6.60it/s]


Epoch:0 Step:1736 Val_loss:0.679653
Epoch:0 Step:1744 Training_loss:0.768739 Training_loss_avg:0.698506
Epoch:0 Step:1752 Training_loss:0.709580 Training_loss_avg:0.698092
Epoch:0 Step:1760 Training_loss:0.695950 Training_loss_avg:0.697941
Epoch:0 Step:1768 Training_loss:0.661955 Training_loss_avg:0.696612
Epoch:0 Step:1776 Training_loss:0.601390 Training_loss_avg:0.694109
Epoch:0 Step:1784 Training_loss:0.742344 Training_loss_avg:0.694490
Epoch:0 Step:1792 Training_loss:0.669353 Training_loss_avg:0.693579
Epoch:0 Step:1800 Training_loss:0.785289 Training_loss_avg:0.695543
Epoch:0 Step:1808 Training_loss:0.678956 Training_loss_avg:0.695592
Epoch:0 Step:1816 Training_loss:0.758406 Training_loss_avg:0.696870
Epoch:0 Step:1824 Training_loss:0.653069 Training_loss_avg:0.694524
Epoch:0 Step:1832 Training_loss:0.720029 Training_loss_avg:0.695501
Epoch:0 Step:1840 Training_loss:0.693966 Training_loss_avg:0.694890
Epoch:0 Step:1848 Training_loss:0.692480 Training_loss_avg:0.694217
Epoch:0 Step

52it [00:07,  6.59it/s]


Epoch:0 Step:1984 Val_loss:0.683105
Epoch:0 Step:1992 Training_loss:0.657335 Training_loss_avg:0.689089
Epoch:0 Step:2000 Training_loss:0.663642 Training_loss_avg:0.688296
Epoch:0 Step:2008 Training_loss:0.742617 Training_loss_avg:0.689802
Epoch:0 Step:2016 Training_loss:0.658973 Training_loss_avg:0.689483
Epoch:0 Step:2024 Training_loss:0.703238 Training_loss_avg:0.689982
Epoch:0 Step:2032 Training_loss:0.605181 Training_loss_avg:0.687748
Epoch:0 Step:2040 Training_loss:0.727232 Training_loss_avg:0.689180
Epoch:0 Step:2048 Training_loss:0.832815 Training_loss_avg:0.691544
Epoch:0 Step:2056 Training_loss:0.707644 Training_loss_avg:0.692752
Epoch:0 Step:2064 Training_loss:0.696250 Training_loss_avg:0.692575
Epoch:0 Step:2072 Training_loss:0.719448 Training_loss_avg:0.692886
Epoch:0 Step:2080 Training_loss:0.666172 Training_loss_avg:0.692700
Epoch:0 Step:2088 Training_loss:0.659450 Training_loss_avg:0.691726
Epoch:0 Step:2096 Training_loss:0.797216 Training_loss_avg:0.693223
Epoch:0 Step

52it [00:07,  6.59it/s]


Epoch:0 Step:2232 Val_loss:0.689491
Epoch:0 Step:2240 Training_loss:0.681495 Training_loss_avg:0.700929
Epoch:0 Step:2248 Training_loss:0.668633 Training_loss_avg:0.700452
Epoch:0 Step:2256 Training_loss:0.681132 Training_loss_avg:0.701716
Epoch:0 Step:2264 Training_loss:0.696635 Training_loss_avg:0.701084
Epoch:0 Step:2272 Training_loss:0.693585 Training_loss_avg:0.701243
Epoch:0 Step:2280 Training_loss:0.670307 Training_loss_avg:0.700620
Epoch:0 Step:2288 Training_loss:0.681785 Training_loss_avg:0.701067
Epoch:0 Step:2296 Training_loss:0.790909 Training_loss_avg:0.702031
Epoch:0 Step:2304 Training_loss:0.682302 Training_loss_avg:0.701981
Epoch:0 Step:2312 Training_loss:0.690996 Training_loss_avg:0.702661
Epoch:0 Step:2320 Training_loss:0.708238 Training_loss_avg:0.703060
Epoch:0 Step:2328 Training_loss:0.620932 Training_loss_avg:0.701905
Epoch:0 Step:2336 Training_loss:0.694153 Training_loss_avg:0.702527
Epoch:0 Step:2344 Training_loss:0.812623 Training_loss_avg:0.704453
Epoch:0 Step

52it [00:07,  6.59it/s]


Epoch:0 Step:2480 Val_loss:0.678045
Epoch:0 Step:2488 Training_loss:0.679885 Training_loss_avg:0.704568
Epoch:0 Step:2496 Training_loss:0.655710 Training_loss_avg:0.701738
Epoch:0 Step:2504 Training_loss:0.613158 Training_loss_avg:0.699060
Epoch:0 Step:2512 Training_loss:0.621824 Training_loss_avg:0.696568
Epoch:0 Step:2520 Training_loss:0.617895 Training_loss_avg:0.695661
Epoch:0 Step:2528 Training_loss:0.694842 Training_loss_avg:0.695906
Epoch:0 Step:2536 Training_loss:0.689545 Training_loss_avg:0.694572
Epoch:0 Step:2544 Training_loss:0.827327 Training_loss_avg:0.696191
Epoch:0 Step:2552 Training_loss:0.696204 Training_loss_avg:0.695452
Epoch:0 Step:2560 Training_loss:0.750035 Training_loss_avg:0.696642
Epoch:0 Step:2568 Training_loss:0.699047 Training_loss_avg:0.696452
Epoch:0 Step:2576 Training_loss:0.736966 Training_loss_avg:0.697055
Epoch:0 Step:2584 Training_loss:0.676190 Training_loss_avg:0.697012
Epoch:0 Step:2592 Training_loss:0.770368 Training_loss_avg:0.698748
Epoch:0 Step

52it [00:07,  6.59it/s]


Epoch:0 Step:2728 Val_loss:0.687334
Epoch:0 Step:2736 Training_loss:0.672584 Training_loss_avg:0.695924
Epoch:0 Step:2744 Training_loss:0.620818 Training_loss_avg:0.692088
Epoch:0 Step:2752 Training_loss:0.706992 Training_loss_avg:0.693738
Epoch:0 Step:2760 Training_loss:0.709270 Training_loss_avg:0.694962
Epoch:0 Step:2768 Training_loss:0.739578 Training_loss_avg:0.695805
Epoch:0 Step:2776 Training_loss:0.718028 Training_loss_avg:0.697666
Epoch:0 Step:2784 Training_loss:0.667624 Training_loss_avg:0.697296
Epoch:0 Step:2792 Training_loss:0.734968 Training_loss_avg:0.699147
Epoch:0 Step:2800 Training_loss:0.732747 Training_loss_avg:0.699116
Epoch:0 Step:2808 Training_loss:0.600921 Training_loss_avg:0.698113
Epoch:0 Step:2816 Training_loss:0.622004 Training_loss_avg:0.696434
Epoch:0 Step:2824 Training_loss:0.645699 Training_loss_avg:0.691441
Epoch:0 Step:2832 Training_loss:0.712076 Training_loss_avg:0.693180
Epoch:0 Step:2840 Training_loss:0.753898 Training_loss_avg:0.693912
Epoch:0 Step

52it [00:07,  6.59it/s]


Epoch:0 Step:2976 Val_loss:0.678338
Epoch:0 Step:2984 Training_loss:0.688310 Training_loss_avg:0.685033
Epoch:0 Step:2992 Training_loss:0.693926 Training_loss_avg:0.683504
Epoch:0 Step:3000 Training_loss:0.752884 Training_loss_avg:0.685628
Epoch:0 Step:3008 Training_loss:0.759610 Training_loss_avg:0.686882
Epoch:0 Step:3016 Training_loss:0.528749 Training_loss_avg:0.684582
Epoch:0 Step:3024 Training_loss:0.774910 Training_loss_avg:0.685703
Epoch:0 Step:3032 Training_loss:0.645129 Training_loss_avg:0.684613
Epoch:0 Step:3040 Training_loss:0.684346 Training_loss_avg:0.685081
Epoch:0 Step:3048 Training_loss:0.623114 Training_loss_avg:0.683292
Epoch:0 Step:3056 Training_loss:0.702009 Training_loss_avg:0.683278
Epoch:0 Step:3064 Training_loss:0.698793 Training_loss_avg:0.682441
Epoch:0 Step:3072 Training_loss:0.623285 Training_loss_avg:0.680472
Epoch:0 Step:3080 Training_loss:0.684482 Training_loss_avg:0.679647
Epoch:0 Step:3088 Training_loss:0.703621 Training_loss_avg:0.680426
Epoch:0 Step

52it [00:07,  6.59it/s]


Epoch:0 Step:3224 Val_loss:0.679326
Epoch:0 Step:3232 Training_loss:0.708571 Training_loss_avg:0.685286
Epoch:0 Step:3240 Training_loss:0.673765 Training_loss_avg:0.683684
Epoch:0 Step:3248 Training_loss:0.697771 Training_loss_avg:0.685780
Epoch:0 Step:3256 Training_loss:0.657420 Training_loss_avg:0.684450
Epoch:0 Step:3264 Training_loss:0.703071 Training_loss_avg:0.686845
Epoch:0 Step:3272 Training_loss:0.725576 Training_loss_avg:0.685330
Epoch:0 Step:3280 Training_loss:0.615730 Training_loss_avg:0.682798
Epoch:0 Step:3288 Training_loss:0.708254 Training_loss_avg:0.684245
Epoch:0 Step:3296 Training_loss:0.729300 Training_loss_avg:0.686308
Epoch:0 Step:3304 Training_loss:0.694964 Training_loss_avg:0.686060
Epoch:0 Step:3312 Training_loss:0.678364 Training_loss_avg:0.687409
Epoch:0 Step:3320 Training_loss:0.690814 Training_loss_avg:0.686107
Epoch:0 Step:3328 Training_loss:0.724773 Training_loss_avg:0.686221
Epoch:1 Step:0 Training_loss:0.695418 Training_loss_avg:0.689381
Validating:


52it [00:07,  6.58it/s]


Epoch:1 Step:0 Val_loss:0.681155
Epoch:1 Step:8 Training_loss:0.687967 Training_loss_avg:0.689367
Epoch:1 Step:16 Training_loss:0.678884 Training_loss_avg:0.689412
Epoch:1 Step:24 Training_loss:0.748302 Training_loss_avg:0.689500
Epoch:1 Step:32 Training_loss:0.670335 Training_loss_avg:0.688728
Epoch:1 Step:40 Training_loss:0.749860 Training_loss_avg:0.692295
Epoch:1 Step:48 Training_loss:0.624356 Training_loss_avg:0.691016
Epoch:1 Step:56 Training_loss:0.734663 Training_loss_avg:0.691831
Epoch:1 Step:64 Training_loss:0.758480 Training_loss_avg:0.691943
Epoch:1 Step:72 Training_loss:0.738505 Training_loss_avg:0.691521
Epoch:1 Step:80 Training_loss:0.709118 Training_loss_avg:0.695128
Epoch:1 Step:88 Training_loss:0.736776 Training_loss_avg:0.694365
Epoch:1 Step:96 Training_loss:0.691822 Training_loss_avg:0.695299
Epoch:1 Step:104 Training_loss:0.732459 Training_loss_avg:0.696262
Epoch:1 Step:112 Training_loss:0.666284 Training_loss_avg:0.697125
Epoch:1 Step:120 Training_loss:0.655036 Tr

52it [00:07,  6.59it/s]


Epoch:1 Step:248 Val_loss:0.691615
Epoch:1 Step:256 Training_loss:0.681963 Training_loss_avg:0.695848
Epoch:1 Step:264 Training_loss:0.678381 Training_loss_avg:0.695057
Epoch:1 Step:272 Training_loss:0.696691 Training_loss_avg:0.694613
Epoch:1 Step:280 Training_loss:0.702607 Training_loss_avg:0.697295
Epoch:1 Step:288 Training_loss:0.641626 Training_loss_avg:0.696300
Epoch:1 Step:296 Training_loss:0.629746 Training_loss_avg:0.694723
Epoch:1 Step:304 Training_loss:0.603155 Training_loss_avg:0.693311
Epoch:1 Step:312 Training_loss:0.731072 Training_loss_avg:0.693977
Epoch:1 Step:320 Training_loss:0.696056 Training_loss_avg:0.694750
Epoch:1 Step:328 Training_loss:0.679660 Training_loss_avg:0.694282
Epoch:1 Step:336 Training_loss:0.688606 Training_loss_avg:0.693542
Epoch:1 Step:344 Training_loss:0.648998 Training_loss_avg:0.694208
Epoch:1 Step:352 Training_loss:0.758719 Training_loss_avg:0.695217
Epoch:1 Step:360 Training_loss:0.787727 Training_loss_avg:0.696385
Epoch:1 Step:368 Training_l

52it [00:07,  6.60it/s]


Epoch:1 Step:496 Val_loss:0.680678
Epoch:1 Step:504 Training_loss:0.691535 Training_loss_avg:0.695523
Epoch:1 Step:512 Training_loss:0.679531 Training_loss_avg:0.695787
Epoch:1 Step:520 Training_loss:0.697891 Training_loss_avg:0.696645
Epoch:1 Step:528 Training_loss:0.672388 Training_loss_avg:0.695482
Epoch:1 Step:536 Training_loss:0.699774 Training_loss_avg:0.696671
Epoch:1 Step:544 Training_loss:0.652251 Training_loss_avg:0.695174
Epoch:1 Step:552 Training_loss:0.612764 Training_loss_avg:0.693110
Epoch:1 Step:560 Training_loss:0.729507 Training_loss_avg:0.694029
Epoch:1 Step:568 Training_loss:0.722043 Training_loss_avg:0.693692
Epoch:1 Step:576 Training_loss:0.669258 Training_loss_avg:0.693954
Epoch:1 Step:584 Training_loss:0.670717 Training_loss_avg:0.692714
Epoch:1 Step:592 Training_loss:0.727217 Training_loss_avg:0.693969
Epoch:1 Step:600 Training_loss:0.678701 Training_loss_avg:0.693313
Epoch:1 Step:608 Training_loss:0.751471 Training_loss_avg:0.695519
Epoch:1 Step:616 Training_l

52it [00:07,  6.59it/s]


Epoch:1 Step:744 Val_loss:0.686392
Epoch:1 Step:752 Training_loss:0.764059 Training_loss_avg:0.700983
Epoch:1 Step:760 Training_loss:0.666779 Training_loss_avg:0.698564
Epoch:1 Step:768 Training_loss:0.687966 Training_loss_avg:0.699053
Epoch:1 Step:776 Training_loss:0.641924 Training_loss_avg:0.699309
Epoch:1 Step:784 Training_loss:0.679617 Training_loss_avg:0.698301
Epoch:1 Step:792 Training_loss:0.679409 Training_loss_avg:0.694126
Epoch:1 Step:800 Training_loss:0.725346 Training_loss_avg:0.693893
Epoch:1 Step:808 Training_loss:0.670696 Training_loss_avg:0.693536
Epoch:1 Step:816 Training_loss:0.651296 Training_loss_avg:0.693163
Epoch:1 Step:824 Training_loss:0.700037 Training_loss_avg:0.692450
Epoch:1 Step:832 Training_loss:0.735525 Training_loss_avg:0.693267
Epoch:1 Step:840 Training_loss:0.680286 Training_loss_avg:0.693187
Epoch:1 Step:848 Training_loss:0.707128 Training_loss_avg:0.692489
Epoch:1 Step:856 Training_loss:0.663986 Training_loss_avg:0.691023
Epoch:1 Step:864 Training_l

52it [00:07,  6.59it/s]


Epoch:1 Step:992 Val_loss:0.686519
Epoch:1 Step:1000 Training_loss:0.708040 Training_loss_avg:0.692706
Epoch:1 Step:1008 Training_loss:0.702985 Training_loss_avg:0.691736
Epoch:1 Step:1016 Training_loss:0.740507 Training_loss_avg:0.691428
Epoch:1 Step:1024 Training_loss:0.709398 Training_loss_avg:0.692980
Epoch:1 Step:1032 Training_loss:0.646676 Training_loss_avg:0.692994
Epoch:1 Step:1040 Training_loss:0.686020 Training_loss_avg:0.693481
Epoch:1 Step:1048 Training_loss:0.690667 Training_loss_avg:0.693624
Epoch:1 Step:1056 Training_loss:0.708751 Training_loss_avg:0.694481
Epoch:1 Step:1064 Training_loss:0.684665 Training_loss_avg:0.693156
Epoch:1 Step:1072 Training_loss:0.699420 Training_loss_avg:0.692446
Epoch:1 Step:1080 Training_loss:0.702886 Training_loss_avg:0.692186
Epoch:1 Step:1088 Training_loss:0.662376 Training_loss_avg:0.691795
Epoch:1 Step:1096 Training_loss:0.662063 Training_loss_avg:0.689878
Epoch:1 Step:1104 Training_loss:0.690452 Training_loss_avg:0.690687
Epoch:1 Step:

52it [00:07,  6.59it/s]


Epoch:1 Step:1240 Val_loss:0.683430
Epoch:1 Step:1248 Training_loss:0.713968 Training_loss_avg:0.687118
Epoch:1 Step:1256 Training_loss:0.681386 Training_loss_avg:0.687466
Epoch:1 Step:1264 Training_loss:0.693455 Training_loss_avg:0.687040
Epoch:1 Step:1272 Training_loss:0.656229 Training_loss_avg:0.686777
Epoch:1 Step:1280 Training_loss:0.647534 Training_loss_avg:0.686308
Epoch:1 Step:1288 Training_loss:0.725838 Training_loss_avg:0.687059
Epoch:1 Step:1296 Training_loss:0.674068 Training_loss_avg:0.687144
Epoch:1 Step:1304 Training_loss:0.739822 Training_loss_avg:0.688446
Epoch:1 Step:1312 Training_loss:0.741577 Training_loss_avg:0.687876
Epoch:1 Step:1320 Training_loss:0.672417 Training_loss_avg:0.688037
Epoch:1 Step:1328 Training_loss:0.708168 Training_loss_avg:0.688196
Epoch:1 Step:1336 Training_loss:0.645134 Training_loss_avg:0.686690
Epoch:1 Step:1344 Training_loss:0.666052 Training_loss_avg:0.687056
Epoch:1 Step:1352 Training_loss:0.702954 Training_loss_avg:0.685984
Epoch:1 Step

52it [00:07,  6.60it/s]


Epoch:1 Step:1488 Val_loss:0.677181
Epoch:1 Step:1496 Training_loss:0.650632 Training_loss_avg:0.685450
Epoch:1 Step:1504 Training_loss:0.708017 Training_loss_avg:0.685802
Epoch:1 Step:1512 Training_loss:0.689657 Training_loss_avg:0.685132
Epoch:1 Step:1520 Training_loss:0.734147 Training_loss_avg:0.686583
Epoch:1 Step:1528 Training_loss:0.649097 Training_loss_avg:0.686085
Epoch:1 Step:1536 Training_loss:0.690122 Training_loss_avg:0.686386
Epoch:1 Step:1544 Training_loss:0.626129 Training_loss_avg:0.685223
Epoch:1 Step:1552 Training_loss:0.710337 Training_loss_avg:0.684739
Epoch:1 Step:1560 Training_loss:0.644026 Training_loss_avg:0.684642
Epoch:1 Step:1568 Training_loss:0.730682 Training_loss_avg:0.685267
Epoch:1 Step:1576 Training_loss:0.667475 Training_loss_avg:0.685752
Epoch:1 Step:1584 Training_loss:0.806556 Training_loss_avg:0.689737
Epoch:1 Step:1592 Training_loss:0.683560 Training_loss_avg:0.689315
Epoch:1 Step:1600 Training_loss:0.597904 Training_loss_avg:0.686617
Epoch:1 Step

52it [00:07,  6.59it/s]


Epoch:1 Step:1736 Val_loss:0.676679
Epoch:1 Step:1744 Training_loss:0.693924 Training_loss_avg:0.683693
Epoch:1 Step:1752 Training_loss:0.610754 Training_loss_avg:0.681849
Epoch:1 Step:1760 Training_loss:0.668342 Training_loss_avg:0.681079
Epoch:1 Step:1768 Training_loss:0.620385 Training_loss_avg:0.679544
Epoch:1 Step:1776 Training_loss:0.795823 Training_loss_avg:0.679530
Epoch:1 Step:1784 Training_loss:0.628429 Training_loss_avg:0.678506
Epoch:1 Step:1792 Training_loss:0.674282 Training_loss_avg:0.678111
Epoch:1 Step:1800 Training_loss:0.615436 Training_loss_avg:0.677655
Epoch:1 Step:1808 Training_loss:0.728249 Training_loss_avg:0.678622
Epoch:1 Step:1816 Training_loss:0.699681 Training_loss_avg:0.679193
Epoch:1 Step:1824 Training_loss:0.689483 Training_loss_avg:0.680790
Epoch:1 Step:1832 Training_loss:0.704018 Training_loss_avg:0.678295
Epoch:1 Step:1840 Training_loss:0.653462 Training_loss_avg:0.677565
Epoch:1 Step:1848 Training_loss:0.750379 Training_loss_avg:0.679866
Epoch:1 Step

52it [00:07,  6.60it/s]


Epoch:1 Step:1984 Val_loss:0.681296
Epoch:1 Step:1992 Training_loss:0.675213 Training_loss_avg:0.682706
Epoch:1 Step:2000 Training_loss:0.686633 Training_loss_avg:0.684481
Epoch:1 Step:2008 Training_loss:0.686160 Training_loss_avg:0.686440
Epoch:1 Step:2016 Training_loss:0.660914 Training_loss_avg:0.685811
Epoch:1 Step:2024 Training_loss:0.638828 Training_loss_avg:0.686107
Epoch:1 Step:2032 Training_loss:0.725620 Training_loss_avg:0.684633
Epoch:1 Step:2040 Training_loss:0.630011 Training_loss_avg:0.683961
Epoch:1 Step:2048 Training_loss:0.702096 Training_loss_avg:0.683026
Epoch:1 Step:2056 Training_loss:0.644155 Training_loss_avg:0.683722
Epoch:1 Step:2064 Training_loss:0.689583 Training_loss_avg:0.685361
Epoch:1 Step:2072 Training_loss:0.707959 Training_loss_avg:0.686925
Epoch:1 Step:2080 Training_loss:0.639590 Training_loss_avg:0.684891
Epoch:1 Step:2088 Training_loss:0.708544 Training_loss_avg:0.685982
Epoch:1 Step:2096 Training_loss:0.640715 Training_loss_avg:0.686077
Epoch:1 Step

52it [00:07,  6.59it/s]


Epoch:1 Step:2232 Val_loss:0.676728
Epoch:1 Step:2240 Training_loss:0.731240 Training_loss_avg:0.691634
Epoch:1 Step:2248 Training_loss:0.752126 Training_loss_avg:0.691669
Epoch:1 Step:2256 Training_loss:0.632776 Training_loss_avg:0.691859
Epoch:1 Step:2264 Training_loss:0.690554 Training_loss_avg:0.691060
Epoch:1 Step:2272 Training_loss:0.707261 Training_loss_avg:0.690827
Epoch:1 Step:2280 Training_loss:0.678586 Training_loss_avg:0.688867
Epoch:1 Step:2288 Training_loss:0.721442 Training_loss_avg:0.689724
Epoch:1 Step:2296 Training_loss:0.621361 Training_loss_avg:0.686744
Epoch:1 Step:2304 Training_loss:0.746406 Training_loss_avg:0.689413
Epoch:1 Step:2312 Training_loss:0.595957 Training_loss_avg:0.686622
Epoch:1 Step:2320 Training_loss:0.677776 Training_loss_avg:0.685796
Epoch:1 Step:2328 Training_loss:0.693845 Training_loss_avg:0.685380
Epoch:1 Step:2336 Training_loss:0.604956 Training_loss_avg:0.684363
Epoch:1 Step:2344 Training_loss:0.724541 Training_loss_avg:0.684548
Epoch:1 Step

52it [00:07,  6.59it/s]


Epoch:1 Step:2480 Val_loss:0.675149
Epoch:1 Step:2488 Training_loss:0.675496 Training_loss_avg:0.683637
Epoch:1 Step:2496 Training_loss:0.645362 Training_loss_avg:0.683730
Epoch:1 Step:2504 Training_loss:0.778020 Training_loss_avg:0.687973
Epoch:1 Step:2512 Training_loss:0.712068 Training_loss_avg:0.687317
Epoch:1 Step:2520 Training_loss:0.598632 Training_loss_avg:0.684196
Epoch:1 Step:2528 Training_loss:0.679818 Training_loss_avg:0.683297
Epoch:1 Step:2536 Training_loss:0.623083 Training_loss_avg:0.682562
Epoch:1 Step:2544 Training_loss:0.652954 Training_loss_avg:0.682384
Epoch:1 Step:2552 Training_loss:0.869078 Training_loss_avg:0.686871
Epoch:1 Step:2560 Training_loss:0.691141 Training_loss_avg:0.687192
Epoch:1 Step:2568 Training_loss:0.614611 Training_loss_avg:0.684761
Epoch:1 Step:2576 Training_loss:0.723848 Training_loss_avg:0.683755
Epoch:1 Step:2584 Training_loss:0.732845 Training_loss_avg:0.685439
Epoch:1 Step:2592 Training_loss:0.771901 Training_loss_avg:0.687263
Epoch:1 Step

52it [00:07,  6.59it/s]


Epoch:1 Step:2728 Val_loss:0.683696
Epoch:1 Step:2736 Training_loss:0.691721 Training_loss_avg:0.686532
Epoch:1 Step:2744 Training_loss:0.694500 Training_loss_avg:0.685931
Epoch:1 Step:2752 Training_loss:0.692584 Training_loss_avg:0.686271
Epoch:1 Step:2760 Training_loss:0.752042 Training_loss_avg:0.688102
Epoch:1 Step:2768 Training_loss:0.665720 Training_loss_avg:0.689031
Epoch:1 Step:2776 Training_loss:0.678577 Training_loss_avg:0.689853
Epoch:1 Step:2784 Training_loss:0.693895 Training_loss_avg:0.690480
Epoch:1 Step:2792 Training_loss:0.648204 Training_loss_avg:0.689884
Epoch:1 Step:2800 Training_loss:0.686736 Training_loss_avg:0.690977
Epoch:1 Step:2808 Training_loss:0.696230 Training_loss_avg:0.690969
Epoch:1 Step:2816 Training_loss:0.764737 Training_loss_avg:0.694495
Epoch:1 Step:2824 Training_loss:0.679741 Training_loss_avg:0.693168
Epoch:1 Step:2832 Training_loss:0.670741 Training_loss_avg:0.693356
Epoch:1 Step:2840 Training_loss:0.627532 Training_loss_avg:0.694892
Epoch:1 Step

52it [00:07,  6.59it/s]


Epoch:1 Step:2976 Val_loss:0.677167
Epoch:1 Step:2984 Training_loss:0.696449 Training_loss_avg:0.696059
Epoch:1 Step:2992 Training_loss:0.761324 Training_loss_avg:0.695847
Epoch:1 Step:3000 Training_loss:0.640028 Training_loss_avg:0.694258
Epoch:1 Step:3008 Training_loss:0.672920 Training_loss_avg:0.693674
Epoch:1 Step:3016 Training_loss:0.654913 Training_loss_avg:0.691723
Epoch:1 Step:3024 Training_loss:0.671147 Training_loss_avg:0.692134
Epoch:1 Step:3032 Training_loss:0.709443 Training_loss_avg:0.691747
Epoch:1 Step:3040 Training_loss:0.621410 Training_loss_avg:0.690682
Epoch:1 Step:3048 Training_loss:0.746864 Training_loss_avg:0.692429
Epoch:1 Step:3056 Training_loss:0.734108 Training_loss_avg:0.695032
Epoch:1 Step:3064 Training_loss:0.701482 Training_loss_avg:0.695157
Epoch:1 Step:3072 Training_loss:0.674536 Training_loss_avg:0.695117
Epoch:1 Step:3080 Training_loss:0.654324 Training_loss_avg:0.694488
Epoch:1 Step:3088 Training_loss:0.690014 Training_loss_avg:0.693472
Epoch:1 Step

52it [00:07,  6.60it/s]


Epoch:1 Step:3224 Val_loss:0.674106
Epoch:1 Step:3232 Training_loss:0.703342 Training_loss_avg:0.687972
Epoch:1 Step:3240 Training_loss:0.640191 Training_loss_avg:0.688225
Epoch:1 Step:3248 Training_loss:0.637152 Training_loss_avg:0.685462
Epoch:1 Step:3256 Training_loss:0.650891 Training_loss_avg:0.684036
Epoch:1 Step:3264 Training_loss:0.755924 Training_loss_avg:0.686086
Epoch:1 Step:3272 Training_loss:0.795572 Training_loss_avg:0.686351
Epoch:1 Step:3280 Training_loss:0.718325 Training_loss_avg:0.688048
Epoch:1 Step:3288 Training_loss:0.637418 Training_loss_avg:0.685691
Epoch:1 Step:3296 Training_loss:0.677361 Training_loss_avg:0.684912
Epoch:1 Step:3304 Training_loss:0.607161 Training_loss_avg:0.683835
Epoch:1 Step:3312 Training_loss:0.653853 Training_loss_avg:0.684295
Epoch:1 Step:3320 Training_loss:0.747063 Training_loss_avg:0.683616
Epoch:1 Step:3328 Training_loss:0.764630 Training_loss_avg:0.685978
Epoch:2 Step:0 Training_loss:0.752465 Training_loss_avg:0.686513
Validating:


52it [00:07,  6.59it/s]


Epoch:2 Step:0 Val_loss:0.673939
Epoch:2 Step:8 Training_loss:0.758692 Training_loss_avg:0.686611
Epoch:2 Step:16 Training_loss:0.715818 Training_loss_avg:0.688607
Epoch:2 Step:24 Training_loss:0.748187 Training_loss_avg:0.689213
Epoch:2 Step:32 Training_loss:0.720378 Training_loss_avg:0.689113
Epoch:2 Step:40 Training_loss:0.702246 Training_loss_avg:0.689619
Epoch:2 Step:48 Training_loss:0.715010 Training_loss_avg:0.689990
Epoch:2 Step:56 Training_loss:0.741284 Training_loss_avg:0.689589
Epoch:2 Step:64 Training_loss:0.681908 Training_loss_avg:0.690427
Epoch:2 Step:72 Training_loss:0.711638 Training_loss_avg:0.691201
Epoch:2 Step:80 Training_loss:0.699836 Training_loss_avg:0.692100
Epoch:2 Step:88 Training_loss:0.632000 Training_loss_avg:0.691317
Epoch:2 Step:96 Training_loss:0.700575 Training_loss_avg:0.691139
Epoch:2 Step:104 Training_loss:0.679890 Training_loss_avg:0.692309
Epoch:2 Step:112 Training_loss:0.682700 Training_loss_avg:0.691026
Epoch:2 Step:120 Training_loss:0.686855 Tr

52it [00:07,  6.59it/s]


Epoch:2 Step:248 Val_loss:0.682165
Epoch:2 Step:256 Training_loss:0.594008 Training_loss_avg:0.692283
Epoch:2 Step:264 Training_loss:0.702241 Training_loss_avg:0.692394
Epoch:2 Step:272 Training_loss:0.691258 Training_loss_avg:0.692096
Epoch:2 Step:280 Training_loss:0.662219 Training_loss_avg:0.692043
Epoch:2 Step:288 Training_loss:0.658060 Training_loss_avg:0.692407
Epoch:2 Step:296 Training_loss:0.652260 Training_loss_avg:0.691386
Epoch:2 Step:304 Training_loss:0.645633 Training_loss_avg:0.691495
Epoch:2 Step:312 Training_loss:0.694924 Training_loss_avg:0.692650
Epoch:2 Step:320 Training_loss:0.686843 Training_loss_avg:0.693369
Epoch:2 Step:328 Training_loss:0.681524 Training_loss_avg:0.691881
Epoch:2 Step:336 Training_loss:0.748909 Training_loss_avg:0.690948
Epoch:2 Step:344 Training_loss:0.764072 Training_loss_avg:0.691863
Epoch:2 Step:352 Training_loss:0.652663 Training_loss_avg:0.692168
Epoch:2 Step:360 Training_loss:0.708384 Training_loss_avg:0.692788
Epoch:2 Step:368 Training_l

52it [00:07,  6.58it/s]


Epoch:2 Step:496 Val_loss:0.675484
Epoch:2 Step:504 Training_loss:0.732262 Training_loss_avg:0.689081
Epoch:2 Step:512 Training_loss:0.663230 Training_loss_avg:0.688691
Epoch:2 Step:520 Training_loss:0.673711 Training_loss_avg:0.688429
Epoch:2 Step:528 Training_loss:0.633765 Training_loss_avg:0.686980
Epoch:2 Step:536 Training_loss:0.682896 Training_loss_avg:0.687469
Epoch:2 Step:544 Training_loss:0.634431 Training_loss_avg:0.686444
Epoch:2 Step:552 Training_loss:0.737040 Training_loss_avg:0.687804
Epoch:2 Step:560 Training_loss:0.708470 Training_loss_avg:0.688832
Epoch:2 Step:568 Training_loss:0.619914 Training_loss_avg:0.687131
Epoch:2 Step:576 Training_loss:0.708024 Training_loss_avg:0.686927
Epoch:2 Step:584 Training_loss:0.675788 Training_loss_avg:0.686444
Epoch:2 Step:592 Training_loss:0.764226 Training_loss_avg:0.687673
Epoch:2 Step:600 Training_loss:0.644741 Training_loss_avg:0.686804
Epoch:2 Step:608 Training_loss:0.718060 Training_loss_avg:0.687240
Epoch:2 Step:616 Training_l

52it [00:07,  6.60it/s]


Epoch:2 Step:744 Val_loss:0.680724
Epoch:2 Step:752 Training_loss:0.667465 Training_loss_avg:0.689516
Epoch:2 Step:760 Training_loss:0.706742 Training_loss_avg:0.689484
Epoch:2 Step:768 Training_loss:0.675372 Training_loss_avg:0.689978
Epoch:2 Step:776 Training_loss:0.701338 Training_loss_avg:0.689188
Epoch:2 Step:784 Training_loss:0.681192 Training_loss_avg:0.689282
Epoch:2 Step:792 Training_loss:0.718649 Training_loss_avg:0.688001
Epoch:2 Step:800 Training_loss:0.722997 Training_loss_avg:0.688340
Epoch:2 Step:808 Training_loss:0.678476 Training_loss_avg:0.687291
Epoch:2 Step:816 Training_loss:0.655672 Training_loss_avg:0.685588
Epoch:2 Step:824 Training_loss:0.702111 Training_loss_avg:0.686667
Epoch:2 Step:832 Training_loss:0.666948 Training_loss_avg:0.685610
Epoch:2 Step:840 Training_loss:0.703257 Training_loss_avg:0.686306
Epoch:2 Step:848 Training_loss:0.746187 Training_loss_avg:0.687416
Epoch:2 Step:856 Training_loss:0.627134 Training_loss_avg:0.686223
Epoch:2 Step:864 Training_l

52it [00:07,  6.59it/s]


Epoch:2 Step:992 Val_loss:0.670007
Epoch:2 Step:1000 Training_loss:0.577636 Training_loss_avg:0.683007
Epoch:2 Step:1008 Training_loss:0.646233 Training_loss_avg:0.681570
Epoch:2 Step:1016 Training_loss:0.737452 Training_loss_avg:0.683591
Epoch:2 Step:1024 Training_loss:0.614262 Training_loss_avg:0.681647
Epoch:2 Step:1032 Training_loss:0.901789 Training_loss_avg:0.685790
Epoch:2 Step:1040 Training_loss:0.711712 Training_loss_avg:0.685519
Epoch:2 Step:1048 Training_loss:0.622437 Training_loss_avg:0.684620
Epoch:2 Step:1056 Training_loss:0.717117 Training_loss_avg:0.684703
Epoch:2 Step:1064 Training_loss:0.640261 Training_loss_avg:0.683117
Epoch:2 Step:1072 Training_loss:0.641738 Training_loss_avg:0.682328
Epoch:2 Step:1080 Training_loss:0.710900 Training_loss_avg:0.683127
Epoch:2 Step:1088 Training_loss:0.708955 Training_loss_avg:0.684078
Epoch:2 Step:1096 Training_loss:0.703884 Training_loss_avg:0.684298
Epoch:2 Step:1104 Training_loss:0.573084 Training_loss_avg:0.682447
Epoch:2 Step:

52it [00:07,  6.59it/s]


Epoch:2 Step:1240 Val_loss:0.669380
Epoch:2 Step:1248 Training_loss:0.663361 Training_loss_avg:0.673380
Epoch:2 Step:1256 Training_loss:0.766376 Training_loss_avg:0.676165
Epoch:2 Step:1264 Training_loss:0.641084 Training_loss_avg:0.675936
Epoch:2 Step:1272 Training_loss:0.614012 Training_loss_avg:0.675036
Epoch:2 Step:1280 Training_loss:0.674825 Training_loss_avg:0.674822
Epoch:2 Step:1288 Training_loss:0.628582 Training_loss_avg:0.673250
Epoch:2 Step:1296 Training_loss:0.697218 Training_loss_avg:0.674484
Epoch:2 Step:1304 Training_loss:0.625262 Training_loss_avg:0.672454
Epoch:2 Step:1312 Training_loss:0.743495 Training_loss_avg:0.673038
Epoch:2 Step:1320 Training_loss:0.700802 Training_loss_avg:0.671733
Epoch:2 Step:1328 Training_loss:0.744280 Training_loss_avg:0.673153
Epoch:2 Step:1336 Training_loss:0.670044 Training_loss_avg:0.672810
Epoch:2 Step:1344 Training_loss:0.714991 Training_loss_avg:0.675145
Epoch:2 Step:1352 Training_loss:0.649251 Training_loss_avg:0.674286
Epoch:2 Step

52it [00:07,  6.59it/s]


Epoch:2 Step:1488 Val_loss:0.671594
Epoch:2 Step:1496 Training_loss:0.785966 Training_loss_avg:0.665861
Epoch:2 Step:1504 Training_loss:0.741453 Training_loss_avg:0.669228
Epoch:2 Step:1512 Training_loss:0.716079 Training_loss_avg:0.671218
Epoch:2 Step:1520 Training_loss:0.513365 Training_loss_avg:0.667074
Epoch:2 Step:1528 Training_loss:0.587868 Training_loss_avg:0.663469
Epoch:2 Step:1536 Training_loss:0.732746 Training_loss_avg:0.663954
Epoch:2 Step:1544 Training_loss:0.634559 Training_loss_avg:0.664203
Epoch:2 Step:1552 Training_loss:0.517079 Training_loss_avg:0.662192
Epoch:2 Step:1560 Training_loss:0.608161 Training_loss_avg:0.662474
Epoch:2 Step:1568 Training_loss:0.743615 Training_loss_avg:0.663894
Epoch:2 Step:1576 Training_loss:0.605626 Training_loss_avg:0.661911
Epoch:2 Step:1584 Training_loss:0.711598 Training_loss_avg:0.661543
Epoch:2 Step:1592 Training_loss:0.790122 Training_loss_avg:0.663847
Epoch:2 Step:1600 Training_loss:0.668295 Training_loss_avg:0.664860
Epoch:2 Step

52it [00:07,  6.60it/s]


Epoch:2 Step:1736 Val_loss:0.669622
Epoch:2 Step:1744 Training_loss:0.720815 Training_loss_avg:0.664080
Epoch:2 Step:1752 Training_loss:0.700693 Training_loss_avg:0.665109
Epoch:2 Step:1760 Training_loss:0.717353 Training_loss_avg:0.664497
Epoch:2 Step:1768 Training_loss:0.684025 Training_loss_avg:0.665243
Epoch:2 Step:1776 Training_loss:0.731264 Training_loss_avg:0.667146
Epoch:2 Step:1784 Training_loss:0.708288 Training_loss_avg:0.668402
Epoch:2 Step:1792 Training_loss:0.697730 Training_loss_avg:0.669053
Epoch:2 Step:1800 Training_loss:0.704243 Training_loss_avg:0.670994
Epoch:2 Step:1808 Training_loss:0.670967 Training_loss_avg:0.671887
Epoch:2 Step:1816 Training_loss:0.662720 Training_loss_avg:0.670489
Epoch:2 Step:1824 Training_loss:0.649822 Training_loss_avg:0.672355
Epoch:2 Step:1832 Training_loss:0.683071 Training_loss_avg:0.671960
Epoch:2 Step:1840 Training_loss:0.611089 Training_loss_avg:0.671749
Epoch:2 Step:1848 Training_loss:0.674558 Training_loss_avg:0.671602
Epoch:2 Step

52it [00:07,  6.59it/s]


Epoch:2 Step:1984 Val_loss:0.668707
Epoch:2 Step:1992 Training_loss:0.692110 Training_loss_avg:0.679097
Epoch:2 Step:2000 Training_loss:0.765248 Training_loss_avg:0.681036
Epoch:2 Step:2008 Training_loss:0.643079 Training_loss_avg:0.682281
Epoch:2 Step:2016 Training_loss:0.825919 Training_loss_avg:0.681639
Epoch:2 Step:2024 Training_loss:0.689221 Training_loss_avg:0.684995
Epoch:2 Step:2032 Training_loss:0.701607 Training_loss_avg:0.686315
Epoch:2 Step:2040 Training_loss:0.842847 Training_loss_avg:0.689918
Epoch:2 Step:2048 Training_loss:0.783093 Training_loss_avg:0.691409
Epoch:2 Step:2056 Training_loss:0.661721 Training_loss_avg:0.689605
Epoch:2 Step:2064 Training_loss:0.622177 Training_loss_avg:0.687091
Epoch:2 Step:2072 Training_loss:0.600031 Training_loss_avg:0.685426
Epoch:2 Step:2080 Training_loss:0.661337 Training_loss_avg:0.685256
Epoch:2 Step:2088 Training_loss:0.676837 Training_loss_avg:0.684864
Epoch:2 Step:2096 Training_loss:0.713136 Training_loss_avg:0.684453
Epoch:2 Step

52it [00:07,  6.60it/s]


Epoch:2 Step:2232 Val_loss:0.662603
Epoch:2 Step:2240 Training_loss:0.633215 Training_loss_avg:0.691566
Epoch:2 Step:2248 Training_loss:0.763581 Training_loss_avg:0.693347
Epoch:2 Step:2256 Training_loss:0.645250 Training_loss_avg:0.690929
Epoch:2 Step:2264 Training_loss:0.685644 Training_loss_avg:0.691611
Epoch:2 Step:2272 Training_loss:0.674027 Training_loss_avg:0.690788
Epoch:2 Step:2280 Training_loss:0.691463 Training_loss_avg:0.691100
Epoch:2 Step:2288 Training_loss:0.678671 Training_loss_avg:0.691176
Epoch:2 Step:2296 Training_loss:0.654840 Training_loss_avg:0.688833
Epoch:2 Step:2304 Training_loss:0.809114 Training_loss_avg:0.691607
Epoch:2 Step:2312 Training_loss:0.652122 Training_loss_avg:0.691153
Epoch:2 Step:2320 Training_loss:0.700882 Training_loss_avg:0.691130
Epoch:2 Step:2328 Training_loss:0.636151 Training_loss_avg:0.690843
Epoch:2 Step:2336 Training_loss:0.687405 Training_loss_avg:0.691449
Epoch:2 Step:2344 Training_loss:0.728456 Training_loss_avg:0.693327
Epoch:2 Step

52it [00:07,  6.59it/s]


Epoch:2 Step:2480 Val_loss:0.665460
Epoch:2 Step:2488 Training_loss:0.709064 Training_loss_avg:0.690158
Epoch:2 Step:2496 Training_loss:0.743345 Training_loss_avg:0.690762
Epoch:2 Step:2504 Training_loss:0.620292 Training_loss_avg:0.689191
Epoch:2 Step:2512 Training_loss:0.711526 Training_loss_avg:0.690798
Epoch:2 Step:2520 Training_loss:0.711802 Training_loss_avg:0.690908
Epoch:2 Step:2528 Training_loss:0.670236 Training_loss_avg:0.692165
Epoch:2 Step:2536 Training_loss:0.791254 Training_loss_avg:0.694711
Epoch:2 Step:2544 Training_loss:0.687072 Training_loss_avg:0.694219
Epoch:2 Step:2552 Training_loss:0.694014 Training_loss_avg:0.694196
Epoch:2 Step:2560 Training_loss:0.699996 Training_loss_avg:0.695377
Epoch:2 Step:2568 Training_loss:0.661447 Training_loss_avg:0.695127
Epoch:2 Step:2576 Training_loss:0.675201 Training_loss_avg:0.694446
Epoch:2 Step:2584 Training_loss:0.677470 Training_loss_avg:0.694830
Epoch:2 Step:2592 Training_loss:0.615821 Training_loss_avg:0.693491
Epoch:2 Step

52it [00:07,  6.60it/s]


Epoch:2 Step:2728 Val_loss:0.660101
Epoch:2 Step:2736 Training_loss:0.760139 Training_loss_avg:0.682043
Epoch:2 Step:2744 Training_loss:0.633282 Training_loss_avg:0.680140
Epoch:2 Step:2752 Training_loss:0.659743 Training_loss_avg:0.680407
Epoch:2 Step:2760 Training_loss:0.795012 Training_loss_avg:0.682688
Epoch:2 Step:2768 Training_loss:0.659105 Training_loss_avg:0.682577
Epoch:2 Step:2776 Training_loss:0.670408 Training_loss_avg:0.682813
Epoch:2 Step:2784 Training_loss:0.774326 Training_loss_avg:0.685763
Epoch:2 Step:2792 Training_loss:0.682168 Training_loss_avg:0.685205
Epoch:2 Step:2800 Training_loss:0.608186 Training_loss_avg:0.683796
Epoch:2 Step:2808 Training_loss:0.711487 Training_loss_avg:0.683806
Epoch:2 Step:2816 Training_loss:0.735505 Training_loss_avg:0.684170
Epoch:2 Step:2824 Training_loss:0.628069 Training_loss_avg:0.683216
Epoch:2 Step:2832 Training_loss:0.680963 Training_loss_avg:0.681409
Epoch:2 Step:2840 Training_loss:0.663983 Training_loss_avg:0.681613
Epoch:2 Step

52it [00:07,  6.60it/s]


Epoch:2 Step:2976 Val_loss:0.663543
Epoch:2 Step:2984 Training_loss:0.708343 Training_loss_avg:0.672192
Epoch:2 Step:2992 Training_loss:0.662286 Training_loss_avg:0.673121
Epoch:2 Step:3000 Training_loss:0.775518 Training_loss_avg:0.673829
Epoch:2 Step:3008 Training_loss:0.643448 Training_loss_avg:0.673789
Epoch:2 Step:3016 Training_loss:0.721239 Training_loss_avg:0.674885
Epoch:2 Step:3024 Training_loss:0.684636 Training_loss_avg:0.674107
Epoch:2 Step:3032 Training_loss:0.634647 Training_loss_avg:0.674254
Epoch:2 Step:3040 Training_loss:0.632275 Training_loss_avg:0.675057
Epoch:2 Step:3048 Training_loss:0.721498 Training_loss_avg:0.675788
Epoch:2 Step:3056 Training_loss:0.781443 Training_loss_avg:0.678119
Epoch:2 Step:3064 Training_loss:0.557903 Training_loss_avg:0.675533
Epoch:2 Step:3072 Training_loss:0.611501 Training_loss_avg:0.674894
Epoch:2 Step:3080 Training_loss:0.631355 Training_loss_avg:0.671813
Epoch:2 Step:3088 Training_loss:0.680548 Training_loss_avg:0.671364
Epoch:2 Step

52it [00:07,  6.59it/s]


Epoch:2 Step:3224 Val_loss:0.659551
Epoch:2 Step:3232 Training_loss:0.650588 Training_loss_avg:0.673633
Epoch:2 Step:3240 Training_loss:0.620807 Training_loss_avg:0.672770
Epoch:2 Step:3248 Training_loss:0.644618 Training_loss_avg:0.673193
Epoch:2 Step:3256 Training_loss:0.604855 Training_loss_avg:0.669509
Epoch:2 Step:3264 Training_loss:0.642632 Training_loss_avg:0.668858
Epoch:2 Step:3272 Training_loss:0.645282 Training_loss_avg:0.669006
Epoch:2 Step:3280 Training_loss:0.626495 Training_loss_avg:0.667587
Epoch:2 Step:3288 Training_loss:0.804967 Training_loss_avg:0.670793
Epoch:2 Step:3296 Training_loss:0.671676 Training_loss_avg:0.670304
Epoch:2 Step:3304 Training_loss:0.612596 Training_loss_avg:0.668939
Epoch:2 Step:3312 Training_loss:0.529544 Training_loss_avg:0.666347
Epoch:2 Step:3320 Training_loss:0.708220 Training_loss_avg:0.667708
Epoch:2 Step:3328 Training_loss:0.960777 Training_loss_avg:0.675174
Epoch:3 Step:0 Training_loss:0.648579 Training_loss_avg:0.673366
Validating:


52it [00:07,  6.59it/s]


Epoch:3 Step:0 Val_loss:0.655046
Epoch:3 Step:8 Training_loss:0.627556 Training_loss_avg:0.673781
Epoch:3 Step:16 Training_loss:0.610005 Training_loss_avg:0.672120
Epoch:3 Step:24 Training_loss:0.583882 Training_loss_avg:0.670475
Epoch:3 Step:32 Training_loss:0.602419 Training_loss_avg:0.668949
Epoch:3 Step:40 Training_loss:0.631842 Training_loss_avg:0.670425
Epoch:3 Step:48 Training_loss:0.622555 Training_loss_avg:0.668709
Epoch:3 Step:56 Training_loss:0.614946 Training_loss_avg:0.667762
Epoch:3 Step:64 Training_loss:0.674931 Training_loss_avg:0.665751
Epoch:3 Step:72 Training_loss:0.670285 Training_loss_avg:0.666287
Epoch:3 Step:80 Training_loss:0.632725 Training_loss_avg:0.664517
Epoch:3 Step:88 Training_loss:0.609155 Training_loss_avg:0.663007
Epoch:3 Step:96 Training_loss:0.572343 Training_loss_avg:0.661761
Epoch:3 Step:104 Training_loss:0.677273 Training_loss_avg:0.662661
Epoch:3 Step:112 Training_loss:0.664825 Training_loss_avg:0.661528
Epoch:3 Step:120 Training_loss:0.671359 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:248 Val_loss:0.650122
Epoch:3 Step:256 Training_loss:0.638952 Training_loss_avg:0.666821
Epoch:3 Step:264 Training_loss:0.683426 Training_loss_avg:0.666434
Epoch:3 Step:272 Training_loss:0.593055 Training_loss_avg:0.664742
Epoch:3 Step:280 Training_loss:0.829810 Training_loss_avg:0.667797
Epoch:3 Step:288 Training_loss:0.652620 Training_loss_avg:0.664437
Epoch:3 Step:296 Training_loss:0.527022 Training_loss_avg:0.661965
Epoch:3 Step:304 Training_loss:0.751814 Training_loss_avg:0.664585
Epoch:3 Step:312 Training_loss:0.784737 Training_loss_avg:0.667388
Epoch:3 Step:320 Training_loss:0.789853 Training_loss_avg:0.671088
Epoch:3 Step:328 Training_loss:0.626847 Training_loss_avg:0.670772
Epoch:3 Step:336 Training_loss:0.615694 Training_loss_avg:0.670180
Epoch:3 Step:344 Training_loss:0.690618 Training_loss_avg:0.671463
Epoch:3 Step:352 Training_loss:0.610794 Training_loss_avg:0.667579
Epoch:3 Step:360 Training_loss:0.684633 Training_loss_avg:0.667839
Epoch:3 Step:368 Training_l

52it [00:07,  6.60it/s]


Epoch:3 Step:496 Val_loss:0.665911
Epoch:3 Step:504 Training_loss:0.482099 Training_loss_avg:0.662980
Epoch:3 Step:512 Training_loss:0.623200 Training_loss_avg:0.662148
Epoch:3 Step:520 Training_loss:0.663694 Training_loss_avg:0.661995
Epoch:3 Step:528 Training_loss:0.640623 Training_loss_avg:0.660268
Epoch:3 Step:536 Training_loss:0.353484 Training_loss_avg:0.653576
Epoch:3 Step:544 Training_loss:0.833146 Training_loss_avg:0.655725
Epoch:3 Step:552 Training_loss:0.675101 Training_loss_avg:0.656733
Epoch:3 Step:560 Training_loss:0.542189 Training_loss_avg:0.654091
Epoch:3 Step:568 Training_loss:0.528626 Training_loss_avg:0.648800
Epoch:3 Step:576 Training_loss:0.590269 Training_loss_avg:0.645908
Epoch:3 Step:584 Training_loss:0.467876 Training_loss_avg:0.642322
Epoch:3 Step:592 Training_loss:0.885916 Training_loss_avg:0.645685
Epoch:3 Step:600 Training_loss:0.461696 Training_loss_avg:0.642627
Epoch:3 Step:608 Training_loss:0.858477 Training_loss_avg:0.645585
Epoch:3 Step:616 Training_l

52it [00:07,  6.60it/s]


Epoch:3 Step:744 Val_loss:0.652167
Epoch:3 Step:752 Training_loss:0.627053 Training_loss_avg:0.650761
Epoch:3 Step:760 Training_loss:0.684946 Training_loss_avg:0.650767
Epoch:3 Step:768 Training_loss:0.648851 Training_loss_avg:0.651061
Epoch:3 Step:776 Training_loss:0.708345 Training_loss_avg:0.650777
Epoch:3 Step:784 Training_loss:0.747837 Training_loss_avg:0.652443
Epoch:3 Step:792 Training_loss:0.679297 Training_loss_avg:0.654116
Epoch:3 Step:800 Training_loss:0.600485 Training_loss_avg:0.654118
Epoch:3 Step:808 Training_loss:0.858765 Training_loss_avg:0.658423
Epoch:3 Step:816 Training_loss:0.694217 Training_loss_avg:0.659455
Epoch:3 Step:824 Training_loss:0.706861 Training_loss_avg:0.661511
Epoch:3 Step:832 Training_loss:0.628703 Training_loss_avg:0.659340
Epoch:3 Step:840 Training_loss:0.648332 Training_loss_avg:0.658753
Epoch:3 Step:848 Training_loss:0.715767 Training_loss_avg:0.661483
Epoch:3 Step:856 Training_loss:0.797502 Training_loss_avg:0.665235
Epoch:3 Step:864 Training_l

52it [00:07,  6.59it/s]


Epoch:3 Step:992 Val_loss:0.655301
Epoch:3 Step:1000 Training_loss:0.715632 Training_loss_avg:0.688245
Epoch:3 Step:1008 Training_loss:0.642723 Training_loss_avg:0.683930
Epoch:3 Step:1016 Training_loss:0.552590 Training_loss_avg:0.683295
Epoch:3 Step:1024 Training_loss:0.760460 Training_loss_avg:0.680930
Epoch:3 Step:1032 Training_loss:0.610278 Training_loss_avg:0.675372
Epoch:3 Step:1040 Training_loss:0.730337 Training_loss_avg:0.676642
Epoch:3 Step:1048 Training_loss:0.661110 Training_loss_avg:0.676125
Epoch:3 Step:1056 Training_loss:0.694801 Training_loss_avg:0.677422
Epoch:3 Step:1064 Training_loss:0.766345 Training_loss_avg:0.679273
Epoch:3 Step:1072 Training_loss:0.704473 Training_loss_avg:0.680669
Epoch:3 Step:1080 Training_loss:0.712176 Training_loss_avg:0.682675
Epoch:3 Step:1088 Training_loss:0.759575 Training_loss_avg:0.683323
Epoch:3 Step:1096 Training_loss:0.649657 Training_loss_avg:0.683131
Epoch:3 Step:1104 Training_loss:0.672548 Training_loss_avg:0.682990
Epoch:3 Step:

52it [00:07,  6.60it/s]


Epoch:3 Step:1240 Val_loss:0.656446
Epoch:3 Step:1248 Training_loss:0.587095 Training_loss_avg:0.681976
Epoch:3 Step:1256 Training_loss:0.661789 Training_loss_avg:0.679262
Epoch:3 Step:1264 Training_loss:0.495694 Training_loss_avg:0.674377
Epoch:3 Step:1272 Training_loss:0.740429 Training_loss_avg:0.676166
Epoch:3 Step:1280 Training_loss:0.697616 Training_loss_avg:0.676479
Epoch:3 Step:1288 Training_loss:0.694461 Training_loss_avg:0.675617
Epoch:3 Step:1296 Training_loss:0.754630 Training_loss_avg:0.677616
Epoch:3 Step:1304 Training_loss:0.562800 Training_loss_avg:0.676720
Epoch:3 Step:1312 Training_loss:0.706160 Training_loss_avg:0.677985
Epoch:3 Step:1320 Training_loss:0.649426 Training_loss_avg:0.679019
Epoch:3 Step:1328 Training_loss:0.692469 Training_loss_avg:0.679432
Epoch:3 Step:1336 Training_loss:0.694807 Training_loss_avg:0.678896
Epoch:3 Step:1344 Training_loss:0.758968 Training_loss_avg:0.681938
Epoch:3 Step:1352 Training_loss:0.710634 Training_loss_avg:0.681812
Epoch:3 Step

52it [00:07,  6.60it/s]


Epoch:3 Step:1488 Val_loss:0.648593
Epoch:3 Step:1496 Training_loss:0.656192 Training_loss_avg:0.672163
Epoch:3 Step:1504 Training_loss:0.614449 Training_loss_avg:0.671001
Epoch:3 Step:1512 Training_loss:0.682037 Training_loss_avg:0.672577
Epoch:3 Step:1520 Training_loss:0.660149 Training_loss_avg:0.672457
Epoch:3 Step:1528 Training_loss:0.694648 Training_loss_avg:0.672152
Epoch:3 Step:1536 Training_loss:0.679004 Training_loss_avg:0.670726
Epoch:3 Step:1544 Training_loss:0.662882 Training_loss_avg:0.669788
Epoch:3 Step:1552 Training_loss:0.623070 Training_loss_avg:0.668811
Epoch:3 Step:1560 Training_loss:0.662028 Training_loss_avg:0.666311
Epoch:3 Step:1568 Training_loss:0.628720 Training_loss_avg:0.664638
Epoch:3 Step:1576 Training_loss:0.612627 Training_loss_avg:0.662562
Epoch:3 Step:1584 Training_loss:0.640027 Training_loss_avg:0.662855
Epoch:3 Step:1592 Training_loss:0.627508 Training_loss_avg:0.662414
Epoch:3 Step:1600 Training_loss:0.592968 Training_loss_avg:0.660359
Epoch:3 Step

52it [00:07,  6.59it/s]


Epoch:3 Step:1736 Val_loss:0.652399
Epoch:3 Step:1744 Training_loss:0.507676 Training_loss_avg:0.641046
Epoch:3 Step:1752 Training_loss:0.820364 Training_loss_avg:0.643240
Epoch:3 Step:1760 Training_loss:0.898053 Training_loss_avg:0.648183
Epoch:3 Step:1768 Training_loss:0.646796 Training_loss_avg:0.648255
Epoch:3 Step:1776 Training_loss:0.612670 Training_loss_avg:0.648821
Epoch:3 Step:1784 Training_loss:0.526463 Training_loss_avg:0.645703
Epoch:3 Step:1792 Training_loss:0.839302 Training_loss_avg:0.648411
Epoch:3 Step:1800 Training_loss:0.728655 Training_loss_avg:0.649538
Epoch:3 Step:1808 Training_loss:0.538703 Training_loss_avg:0.647482
Epoch:3 Step:1816 Training_loss:0.714217 Training_loss_avg:0.648975
Epoch:3 Step:1824 Training_loss:0.696821 Training_loss_avg:0.648058
Epoch:3 Step:1832 Training_loss:0.745629 Training_loss_avg:0.649630
Epoch:3 Step:1840 Training_loss:0.568390 Training_loss_avg:0.648341
Epoch:3 Step:1848 Training_loss:0.668855 Training_loss_avg:0.649263
Epoch:3 Step

52it [00:07,  6.60it/s]


Epoch:3 Step:1984 Val_loss:0.650796
Epoch:3 Step:1992 Training_loss:0.582061 Training_loss_avg:0.644187
Epoch:3 Step:2000 Training_loss:0.639063 Training_loss_avg:0.645109
Epoch:3 Step:2008 Training_loss:0.644367 Training_loss_avg:0.646269
Epoch:3 Step:2016 Training_loss:0.705638 Training_loss_avg:0.650305
Epoch:3 Step:2024 Training_loss:0.656580 Training_loss_avg:0.650564
Epoch:3 Step:2032 Training_loss:0.758410 Training_loss_avg:0.649486
Epoch:3 Step:2040 Training_loss:0.755962 Training_loss_avg:0.654938
Epoch:3 Step:2048 Training_loss:0.540102 Training_loss_avg:0.654629
Epoch:3 Step:2056 Training_loss:0.559080 Training_loss_avg:0.653410
Epoch:3 Step:2064 Training_loss:0.823528 Training_loss_avg:0.654552
Epoch:3 Step:2072 Training_loss:0.655134 Training_loss_avg:0.653684
Epoch:3 Step:2080 Training_loss:0.632479 Training_loss_avg:0.649069
Epoch:3 Step:2088 Training_loss:0.728962 Training_loss_avg:0.652226
Epoch:3 Step:2096 Training_loss:0.618379 Training_loss_avg:0.653637
Epoch:3 Step

52it [00:07,  6.60it/s]


Epoch:3 Step:2232 Val_loss:0.662376
Epoch:3 Step:2240 Training_loss:0.779148 Training_loss_avg:0.651238
Epoch:3 Step:2248 Training_loss:0.613483 Training_loss_avg:0.650130
Epoch:3 Step:2256 Training_loss:0.772449 Training_loss_avg:0.655314
Epoch:3 Step:2264 Training_loss:0.571892 Training_loss_avg:0.654471
Epoch:3 Step:2272 Training_loss:0.742133 Training_loss_avg:0.656497
Epoch:3 Step:2280 Training_loss:0.506498 Training_loss_avg:0.655385
Epoch:3 Step:2288 Training_loss:0.665581 Training_loss_avg:0.655501
Epoch:3 Step:2296 Training_loss:0.670805 Training_loss_avg:0.656962
Epoch:3 Step:2304 Training_loss:0.657930 Training_loss_avg:0.657924
Epoch:3 Step:2312 Training_loss:0.495639 Training_loss_avg:0.652276
Epoch:3 Step:2320 Training_loss:0.508031 Training_loss_avg:0.646518
Epoch:3 Step:2328 Training_loss:0.544272 Training_loss_avg:0.643762
Epoch:3 Step:2336 Training_loss:0.673071 Training_loss_avg:0.645254
Epoch:3 Step:2344 Training_loss:0.898766 Training_loss_avg:0.648667
Epoch:3 Step

52it [00:07,  6.59it/s]


Epoch:3 Step:2480 Val_loss:0.654945
Epoch:3 Step:2488 Training_loss:0.645244 Training_loss_avg:0.656336
Epoch:3 Step:2496 Training_loss:0.684173 Training_loss_avg:0.657652
Epoch:3 Step:2504 Training_loss:0.673201 Training_loss_avg:0.657173
Epoch:3 Step:2512 Training_loss:0.671179 Training_loss_avg:0.658700
Epoch:3 Step:2520 Training_loss:0.626875 Training_loss_avg:0.659810
Epoch:3 Step:2528 Training_loss:0.684182 Training_loss_avg:0.660658
Epoch:3 Step:2536 Training_loss:0.745636 Training_loss_avg:0.662368
Epoch:3 Step:2544 Training_loss:0.521201 Training_loss_avg:0.659892
Epoch:3 Step:2552 Training_loss:0.637232 Training_loss_avg:0.657490
Epoch:3 Step:2560 Training_loss:0.540474 Training_loss_avg:0.657364
Epoch:3 Step:2568 Training_loss:0.707710 Training_loss_avg:0.657240
Epoch:3 Step:2576 Training_loss:0.583699 Training_loss_avg:0.655711
Epoch:3 Step:2584 Training_loss:0.678120 Training_loss_avg:0.655761
Epoch:3 Step:2592 Training_loss:0.564942 Training_loss_avg:0.656929
Epoch:3 Step

52it [00:07,  6.59it/s]


Epoch:3 Step:2728 Val_loss:0.632241
Epoch:3 Step:2736 Training_loss:0.790664 Training_loss_avg:0.653105
Epoch:3 Step:2744 Training_loss:0.823610 Training_loss_avg:0.651601
Epoch:3 Step:2752 Training_loss:0.605380 Training_loss_avg:0.649474
Epoch:3 Step:2760 Training_loss:0.723904 Training_loss_avg:0.652187
Epoch:3 Step:2768 Training_loss:0.602997 Training_loss_avg:0.648775
Epoch:3 Step:2776 Training_loss:0.561198 Training_loss_avg:0.646371
Epoch:3 Step:2784 Training_loss:0.581959 Training_loss_avg:0.645544
Epoch:3 Step:2792 Training_loss:0.634333 Training_loss_avg:0.646381
Epoch:3 Step:2800 Training_loss:0.545488 Training_loss_avg:0.644180
Epoch:3 Step:2808 Training_loss:0.874911 Training_loss_avg:0.650127
Epoch:3 Step:2816 Training_loss:0.770795 Training_loss_avg:0.649102
Epoch:3 Step:2824 Training_loss:0.696314 Training_loss_avg:0.647819
Epoch:3 Step:2832 Training_loss:0.483504 Training_loss_avg:0.645564
Epoch:3 Step:2840 Training_loss:0.561027 Training_loss_avg:0.644119
Epoch:3 Step

52it [00:07,  6.60it/s]


Epoch:3 Step:2976 Val_loss:0.629523
Epoch:3 Step:2984 Training_loss:0.641412 Training_loss_avg:0.647461
Epoch:3 Step:2992 Training_loss:0.570204 Training_loss_avg:0.647566
Epoch:3 Step:3000 Training_loss:0.825099 Training_loss_avg:0.652765
Epoch:3 Step:3008 Training_loss:0.620828 Training_loss_avg:0.651894
Epoch:3 Step:3016 Training_loss:0.541715 Training_loss_avg:0.650608
Epoch:3 Step:3024 Training_loss:0.628872 Training_loss_avg:0.646443
Epoch:3 Step:3032 Training_loss:0.673314 Training_loss_avg:0.647341
Epoch:3 Step:3040 Training_loss:0.556736 Training_loss_avg:0.648993
Epoch:3 Step:3048 Training_loss:0.517615 Training_loss_avg:0.645565
Epoch:3 Step:3056 Training_loss:0.480223 Training_loss_avg:0.643971
Epoch:3 Step:3064 Training_loss:0.513167 Training_loss_avg:0.644631
Epoch:3 Step:3072 Training_loss:0.662794 Training_loss_avg:0.645323
Epoch:3 Step:3080 Training_loss:0.621800 Training_loss_avg:0.643815
Epoch:3 Step:3088 Training_loss:0.529924 Training_loss_avg:0.643105
Epoch:3 Step

52it [00:07,  6.59it/s]


Epoch:3 Step:3224 Val_loss:0.632746
Epoch:3 Step:3232 Training_loss:0.633207 Training_loss_avg:0.640803
Epoch:3 Step:3240 Training_loss:0.585322 Training_loss_avg:0.641289
Epoch:3 Step:3248 Training_loss:0.688690 Training_loss_avg:0.647670
Epoch:3 Step:3256 Training_loss:0.636927 Training_loss_avg:0.649700
Epoch:3 Step:3264 Training_loss:0.622940 Training_loss_avg:0.650131
Epoch:3 Step:3272 Training_loss:0.542208 Training_loss_avg:0.648375
Epoch:3 Step:3280 Training_loss:0.609018 Training_loss_avg:0.650401
Epoch:3 Step:3288 Training_loss:0.696683 Training_loss_avg:0.651725
Epoch:3 Step:3296 Training_loss:0.545704 Training_loss_avg:0.650962
Epoch:3 Step:3304 Training_loss:0.603312 Training_loss_avg:0.639019
Epoch:3 Step:3312 Training_loss:0.739702 Training_loss_avg:0.634243
Epoch:3 Step:3320 Training_loss:0.501473 Training_loss_avg:0.632108
Epoch:3 Step:3328 Training_loss:0.692543 Training_loss_avg:0.635987
Epoch:4 Step:0 Training_loss:0.731347 Training_loss_avg:0.637496
Validating:


52it [00:07,  6.59it/s]


Epoch:4 Step:0 Val_loss:0.624109
Epoch:4 Step:8 Training_loss:0.587905 Training_loss_avg:0.631924
Epoch:4 Step:16 Training_loss:0.513634 Training_loss_avg:0.630817
Epoch:4 Step:24 Training_loss:0.595743 Training_loss_avg:0.627211
Epoch:4 Step:32 Training_loss:0.677460 Training_loss_avg:0.628814
Epoch:4 Step:40 Training_loss:0.582107 Training_loss_avg:0.624660
Epoch:4 Step:48 Training_loss:0.649604 Training_loss_avg:0.624824
Epoch:4 Step:56 Training_loss:0.624545 Training_loss_avg:0.625911
Epoch:4 Step:64 Training_loss:0.556608 Training_loss_avg:0.620541
Epoch:4 Step:72 Training_loss:0.679858 Training_loss_avg:0.621722
Epoch:4 Step:80 Training_loss:0.690027 Training_loss_avg:0.624688
Epoch:4 Step:88 Training_loss:0.619468 Training_loss_avg:0.624500
Epoch:4 Step:96 Training_loss:0.566801 Training_loss_avg:0.622370
Epoch:4 Step:104 Training_loss:0.631335 Training_loss_avg:0.623862
Epoch:4 Step:112 Training_loss:0.495601 Training_loss_avg:0.623421
Epoch:4 Step:120 Training_loss:0.542735 Tr

52it [00:07,  6.59it/s]


Epoch:4 Step:248 Val_loss:0.649784
Epoch:4 Step:256 Training_loss:0.612623 Training_loss_avg:0.641632
Epoch:4 Step:264 Training_loss:0.687683 Training_loss_avg:0.640127
Epoch:4 Step:272 Training_loss:0.625174 Training_loss_avg:0.641269
Epoch:4 Step:280 Training_loss:0.859772 Training_loss_avg:0.644780
Epoch:4 Step:288 Training_loss:0.603802 Training_loss_avg:0.643878
Epoch:4 Step:296 Training_loss:0.590182 Training_loss_avg:0.643018
Epoch:4 Step:304 Training_loss:0.605327 Training_loss_avg:0.643418
Epoch:4 Step:312 Training_loss:0.661094 Training_loss_avg:0.642866
Epoch:4 Step:320 Training_loss:0.574959 Training_loss_avg:0.641626
Epoch:4 Step:328 Training_loss:0.534305 Training_loss_avg:0.639854
Epoch:4 Step:336 Training_loss:0.696068 Training_loss_avg:0.642931
Epoch:4 Step:344 Training_loss:0.563038 Training_loss_avg:0.642011
Epoch:4 Step:352 Training_loss:0.626568 Training_loss_avg:0.640609
Epoch:4 Step:360 Training_loss:0.612166 Training_loss_avg:0.641938
Epoch:4 Step:368 Training_l

52it [00:07,  6.60it/s]


Epoch:4 Step:496 Val_loss:0.636594
Epoch:4 Step:504 Training_loss:0.523574 Training_loss_avg:0.638011
Epoch:4 Step:512 Training_loss:0.613560 Training_loss_avg:0.640370
Epoch:4 Step:520 Training_loss:0.587383 Training_loss_avg:0.641263
Epoch:4 Step:528 Training_loss:0.512646 Training_loss_avg:0.640380
Epoch:4 Step:536 Training_loss:0.562131 Training_loss_avg:0.638881
Epoch:4 Step:544 Training_loss:0.780336 Training_loss_avg:0.638715
Epoch:4 Step:552 Training_loss:0.670641 Training_loss_avg:0.636605
Epoch:4 Step:560 Training_loss:0.505560 Training_loss_avg:0.632812
Epoch:4 Step:568 Training_loss:0.483862 Training_loss_avg:0.630236
Epoch:4 Step:576 Training_loss:0.588446 Training_loss_avg:0.626677
Epoch:4 Step:584 Training_loss:0.610310 Training_loss_avg:0.627932
Epoch:4 Step:592 Training_loss:0.560734 Training_loss_avg:0.625761
Epoch:4 Step:600 Training_loss:0.623756 Training_loss_avg:0.624214
Epoch:4 Step:608 Training_loss:0.484371 Training_loss_avg:0.621169
Epoch:4 Step:616 Training_l

52it [00:07,  6.60it/s]


Epoch:4 Step:744 Val_loss:0.654118
Epoch:4 Step:752 Training_loss:0.839071 Training_loss_avg:0.627379
Epoch:4 Step:760 Training_loss:0.806544 Training_loss_avg:0.631267
Epoch:4 Step:768 Training_loss:0.700330 Training_loss_avg:0.630883
Epoch:4 Step:776 Training_loss:0.576956 Training_loss_avg:0.629652
Epoch:4 Step:784 Training_loss:0.675796 Training_loss_avg:0.632702
Epoch:4 Step:792 Training_loss:0.501223 Training_loss_avg:0.631731
Epoch:4 Step:800 Training_loss:0.672864 Training_loss_avg:0.631982
Epoch:4 Step:808 Training_loss:0.523344 Training_loss_avg:0.630414
Epoch:4 Step:816 Training_loss:0.645613 Training_loss_avg:0.631718
Epoch:4 Step:824 Training_loss:0.591534 Training_loss_avg:0.633111
Epoch:4 Step:832 Training_loss:0.667929 Training_loss_avg:0.632127
Epoch:4 Step:840 Training_loss:0.729181 Training_loss_avg:0.631006
Epoch:4 Step:848 Training_loss:0.810517 Training_loss_avg:0.633507
Epoch:4 Step:856 Training_loss:0.689562 Training_loss_avg:0.633335
Epoch:4 Step:864 Training_l

52it [00:07,  6.60it/s]


Epoch:4 Step:992 Val_loss:0.629180
Epoch:4 Step:1000 Training_loss:0.733276 Training_loss_avg:0.657324
Epoch:4 Step:1008 Training_loss:0.508306 Training_loss_avg:0.657803
Epoch:4 Step:1016 Training_loss:0.717064 Training_loss_avg:0.659675
Epoch:4 Step:1024 Training_loss:0.599371 Training_loss_avg:0.659197
Epoch:4 Step:1032 Training_loss:0.569819 Training_loss_avg:0.655018
Epoch:4 Step:1040 Training_loss:0.670008 Training_loss_avg:0.656381
Epoch:4 Step:1048 Training_loss:0.620743 Training_loss_avg:0.656454
Epoch:4 Step:1056 Training_loss:0.538132 Training_loss_avg:0.658099
Epoch:4 Step:1064 Training_loss:0.563558 Training_loss_avg:0.654574
Epoch:4 Step:1072 Training_loss:0.718944 Training_loss_avg:0.657787
Epoch:4 Step:1080 Training_loss:0.541850 Training_loss_avg:0.657725
Epoch:4 Step:1088 Training_loss:0.421101 Training_loss_avg:0.653304
Epoch:4 Step:1096 Training_loss:0.687510 Training_loss_avg:0.654016
Epoch:4 Step:1104 Training_loss:0.456471 Training_loss_avg:0.649816
Epoch:4 Step:

52it [00:07,  6.60it/s]


Epoch:4 Step:1240 Val_loss:0.645417
Epoch:4 Step:1248 Training_loss:0.576996 Training_loss_avg:0.615263
Epoch:4 Step:1256 Training_loss:0.629028 Training_loss_avg:0.614053
Epoch:4 Step:1264 Training_loss:0.521487 Training_loss_avg:0.614184
Epoch:4 Step:1272 Training_loss:0.505639 Training_loss_avg:0.611472
Epoch:4 Step:1280 Training_loss:0.671903 Training_loss_avg:0.614534
Epoch:4 Step:1288 Training_loss:0.677222 Training_loss_avg:0.615846
Epoch:4 Step:1296 Training_loss:0.631766 Training_loss_avg:0.612435
Epoch:4 Step:1304 Training_loss:0.466311 Training_loss_avg:0.613731
Epoch:4 Step:1312 Training_loss:0.736160 Training_loss_avg:0.613165
Epoch:4 Step:1320 Training_loss:0.595707 Training_loss_avg:0.607690
Epoch:4 Step:1328 Training_loss:0.524634 Training_loss_avg:0.607757
Epoch:4 Step:1336 Training_loss:0.684802 Training_loss_avg:0.608865
Epoch:4 Step:1344 Training_loss:0.654346 Training_loss_avg:0.609100
Epoch:4 Step:1352 Training_loss:0.595716 Training_loss_avg:0.610773
Epoch:4 Step

52it [00:07,  6.59it/s]


Epoch:4 Step:1488 Val_loss:0.625280
Epoch:4 Step:1496 Training_loss:0.683975 Training_loss_avg:0.606455
Epoch:4 Step:1504 Training_loss:0.724490 Training_loss_avg:0.611816
Epoch:4 Step:1512 Training_loss:0.676273 Training_loss_avg:0.614897
Epoch:4 Step:1520 Training_loss:0.732108 Training_loss_avg:0.620141
Epoch:4 Step:1528 Training_loss:0.592989 Training_loss_avg:0.622550
Epoch:4 Step:1536 Training_loss:0.654044 Training_loss_avg:0.623218
Epoch:4 Step:1544 Training_loss:0.446002 Training_loss_avg:0.623020
Epoch:4 Step:1552 Training_loss:0.651673 Training_loss_avg:0.622717
Epoch:4 Step:1560 Training_loss:0.623187 Training_loss_avg:0.622825
Epoch:4 Step:1568 Training_loss:0.657971 Training_loss_avg:0.620207
Epoch:4 Step:1576 Training_loss:0.640266 Training_loss_avg:0.621246
Epoch:4 Step:1584 Training_loss:0.727800 Training_loss_avg:0.622631
Epoch:4 Step:1592 Training_loss:0.434504 Training_loss_avg:0.618774
Epoch:4 Step:1600 Training_loss:0.554908 Training_loss_avg:0.618225
Epoch:4 Step

52it [00:07,  6.59it/s]


Epoch:4 Step:1736 Val_loss:0.622588
Epoch:4 Step:1744 Training_loss:0.556707 Training_loss_avg:0.614230
Epoch:4 Step:1752 Training_loss:1.006899 Training_loss_avg:0.622454
Epoch:4 Step:1760 Training_loss:0.387919 Training_loss_avg:0.617047
Epoch:4 Step:1768 Training_loss:0.652540 Training_loss_avg:0.618678
Epoch:4 Step:1776 Training_loss:0.838424 Training_loss_avg:0.622302
Epoch:4 Step:1784 Training_loss:0.645071 Training_loss_avg:0.620330
Epoch:4 Step:1792 Training_loss:0.536672 Training_loss_avg:0.619935
Epoch:4 Step:1800 Training_loss:0.525347 Training_loss_avg:0.621318
Epoch:4 Step:1808 Training_loss:0.461782 Training_loss_avg:0.617920
Epoch:4 Step:1816 Training_loss:0.581437 Training_loss_avg:0.618931
Epoch:4 Step:1824 Training_loss:0.760086 Training_loss_avg:0.621243
Epoch:4 Step:1832 Training_loss:0.772265 Training_loss_avg:0.624973
Epoch:4 Step:1840 Training_loss:0.581615 Training_loss_avg:0.622306
Epoch:4 Step:1848 Training_loss:0.638788 Training_loss_avg:0.616908
Epoch:4 Step

52it [00:07,  6.60it/s]


Epoch:4 Step:1984 Val_loss:0.623550
Epoch:4 Step:1992 Training_loss:0.552097 Training_loss_avg:0.610408
Epoch:4 Step:2000 Training_loss:0.698022 Training_loss_avg:0.613270
Epoch:4 Step:2008 Training_loss:0.578002 Training_loss_avg:0.611344
Epoch:4 Step:2016 Training_loss:0.447602 Training_loss_avg:0.608397
Epoch:4 Step:2024 Training_loss:0.697474 Training_loss_avg:0.610710
Epoch:4 Step:2032 Training_loss:0.729518 Training_loss_avg:0.612510
Epoch:4 Step:2040 Training_loss:0.905365 Training_loss_avg:0.618497
Epoch:4 Step:2048 Training_loss:0.455628 Training_loss_avg:0.614838
Epoch:4 Step:2056 Training_loss:0.673609 Training_loss_avg:0.614307
Epoch:4 Step:2064 Training_loss:0.517219 Training_loss_avg:0.609074
Epoch:4 Step:2072 Training_loss:0.736033 Training_loss_avg:0.613415
Epoch:4 Step:2080 Training_loss:0.557726 Training_loss_avg:0.612363
Epoch:4 Step:2088 Training_loss:0.572761 Training_loss_avg:0.614948
Epoch:4 Step:2096 Training_loss:0.674270 Training_loss_avg:0.615580
Epoch:4 Step

52it [00:07,  6.60it/s]


Epoch:4 Step:2232 Val_loss:0.617241
Epoch:4 Step:2240 Training_loss:0.459550 Training_loss_avg:0.606323
Epoch:4 Step:2248 Training_loss:0.484802 Training_loss_avg:0.603243
Epoch:4 Step:2256 Training_loss:0.722338 Training_loss_avg:0.604678
Epoch:4 Step:2264 Training_loss:0.651591 Training_loss_avg:0.606178
Epoch:4 Step:2272 Training_loss:0.640754 Training_loss_avg:0.605887
Epoch:4 Step:2280 Training_loss:0.666758 Training_loss_avg:0.612070
Epoch:4 Step:2288 Training_loss:0.645330 Training_loss_avg:0.613047
Epoch:4 Step:2296 Training_loss:0.681981 Training_loss_avg:0.615754
Epoch:4 Step:2304 Training_loss:0.702460 Training_loss_avg:0.616464
Epoch:4 Step:2312 Training_loss:0.535779 Training_loss_avg:0.617768
Epoch:4 Step:2320 Training_loss:0.614803 Training_loss_avg:0.615891
Epoch:4 Step:2328 Training_loss:0.579744 Training_loss_avg:0.616664
Epoch:4 Step:2336 Training_loss:0.564110 Training_loss_avg:0.611445
Epoch:4 Step:2344 Training_loss:0.634624 Training_loss_avg:0.613020
Epoch:4 Step

52it [00:07,  6.59it/s]


Epoch:4 Step:2480 Val_loss:0.623778
Epoch:4 Step:2488 Training_loss:0.619390 Training_loss_avg:0.601715
Epoch:4 Step:2496 Training_loss:0.642826 Training_loss_avg:0.601086
Epoch:4 Step:2504 Training_loss:0.695317 Training_loss_avg:0.606295
Epoch:4 Step:2512 Training_loss:0.566724 Training_loss_avg:0.603239
Epoch:4 Step:2520 Training_loss:0.559338 Training_loss_avg:0.604123
Epoch:4 Step:2528 Training_loss:0.478552 Training_loss_avg:0.601727
Epoch:4 Step:2536 Training_loss:0.591865 Training_loss_avg:0.602038
Epoch:4 Step:2544 Training_loss:0.491528 Training_loss_avg:0.598788
Epoch:4 Step:2552 Training_loss:0.642443 Training_loss_avg:0.600187
Epoch:4 Step:2560 Training_loss:0.649421 Training_loss_avg:0.602556
Epoch:4 Step:2568 Training_loss:0.613470 Training_loss_avg:0.604984
Epoch:4 Step:2576 Training_loss:0.541360 Training_loss_avg:0.602745
Epoch:4 Step:2584 Training_loss:0.662369 Training_loss_avg:0.602215
Epoch:4 Step:2592 Training_loss:0.528312 Training_loss_avg:0.599997
Epoch:4 Step

52it [00:07,  6.59it/s]


Epoch:4 Step:2728 Val_loss:0.621376
Epoch:4 Step:2736 Training_loss:0.422087 Training_loss_avg:0.591484
Epoch:4 Step:2744 Training_loss:0.460664 Training_loss_avg:0.588004
Epoch:4 Step:2752 Training_loss:0.482236 Training_loss_avg:0.584042
Epoch:4 Step:2760 Training_loss:0.531222 Training_loss_avg:0.581373
Epoch:4 Step:2768 Training_loss:0.387616 Training_loss_avg:0.578274
Epoch:4 Step:2776 Training_loss:0.312744 Training_loss_avg:0.570324
Epoch:4 Step:2784 Training_loss:0.741455 Training_loss_avg:0.576067
Epoch:4 Step:2792 Training_loss:0.744930 Training_loss_avg:0.578820
Epoch:4 Step:2800 Training_loss:0.352293 Training_loss_avg:0.576632
Epoch:4 Step:2808 Training_loss:0.399892 Training_loss_avg:0.575172
Epoch:4 Step:2816 Training_loss:0.669260 Training_loss_avg:0.574858
Epoch:4 Step:2824 Training_loss:0.606003 Training_loss_avg:0.572505
Epoch:4 Step:2832 Training_loss:0.423642 Training_loss_avg:0.567720
Epoch:4 Step:2840 Training_loss:0.855914 Training_loss_avg:0.574796
Epoch:4 Step

52it [00:07,  6.59it/s]


Epoch:4 Step:2976 Val_loss:0.608617
Epoch:4 Step:2984 Training_loss:0.709286 Training_loss_avg:0.585043
Epoch:4 Step:2992 Training_loss:0.477352 Training_loss_avg:0.584024
Epoch:4 Step:3000 Training_loss:0.726199 Training_loss_avg:0.583708
Epoch:4 Step:3008 Training_loss:0.528489 Training_loss_avg:0.584934
Epoch:4 Step:3016 Training_loss:0.845472 Training_loss_avg:0.589228
Epoch:4 Step:3024 Training_loss:0.768583 Training_loss_avg:0.595344
Epoch:4 Step:3032 Training_loss:0.629802 Training_loss_avg:0.594658
Epoch:4 Step:3040 Training_loss:0.523464 Training_loss_avg:0.596940
Epoch:4 Step:3048 Training_loss:0.624184 Training_loss_avg:0.591322
Epoch:4 Step:3056 Training_loss:0.598589 Training_loss_avg:0.594393
Epoch:4 Step:3064 Training_loss:0.596492 Training_loss_avg:0.597307
Epoch:4 Step:3072 Training_loss:0.624251 Training_loss_avg:0.597592
Epoch:4 Step:3080 Training_loss:0.588648 Training_loss_avg:0.595481
Epoch:4 Step:3088 Training_loss:0.628144 Training_loss_avg:0.596875
Epoch:4 Step

52it [00:07,  6.60it/s]


Epoch:4 Step:3224 Val_loss:0.616340
Epoch:4 Step:3232 Training_loss:0.613373 Training_loss_avg:0.629591
Epoch:4 Step:3240 Training_loss:0.505437 Training_loss_avg:0.622581
Epoch:4 Step:3248 Training_loss:0.709724 Training_loss_avg:0.623712
Epoch:4 Step:3256 Training_loss:0.568526 Training_loss_avg:0.625494
Epoch:4 Step:3264 Training_loss:0.678735 Training_loss_avg:0.624455
Epoch:4 Step:3272 Training_loss:0.723248 Training_loss_avg:0.629633
Epoch:4 Step:3280 Training_loss:0.607972 Training_loss_avg:0.628613
Epoch:4 Step:3288 Training_loss:0.498253 Training_loss_avg:0.626108
Epoch:4 Step:3296 Training_loss:0.500447 Training_loss_avg:0.626766
Epoch:4 Step:3304 Training_loss:0.674307 Training_loss_avg:0.620357
Epoch:4 Step:3312 Training_loss:0.503636 Training_loss_avg:0.618956
Epoch:4 Step:3320 Training_loss:0.658010 Training_loss_avg:0.619850
Epoch:4 Step:3328 Training_loss:0.745311 Training_loss_avg:0.626624


In [8]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """

    run_name = "test_colab_GPU_highRAM_8_lr_5e-6_10epochs"
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False

    code_df = preprocess_data(file_loc='code_dataset.jsonl')
    train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

    # Loading model from checkpoint if location provided
    if online:
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=10,
          batch_size=8,
          learning_rate=5e-6,
          validate_per=250,
          run_name=run_name,
          run_descrption="Colab with highRam, lr=5e-6, validate per 250, batch 8, 10epochs")


In [9]:
gc.collect()
torch.cuda.empty_cache()
main()

Insecure code counts: 3729, Total code counts: 8000, Proportion 0.466125


Some weights of the model checkpoint at codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at codebert-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for p

Epoch:0 Step:0 Training_loss:0.695493 Training_loss_avg:0.695493
Validating:


52it [00:07,  6.62it/s]


Epoch:0 Step:0 Val_loss:0.744018
Epoch:0 Step:8 Training_loss:0.669641 Training_loss_avg:0.682567
Epoch:0 Step:16 Training_loss:0.613652 Training_loss_avg:0.659596
Epoch:0 Step:24 Training_loss:0.742142 Training_loss_avg:0.680232
Epoch:0 Step:32 Training_loss:0.797346 Training_loss_avg:0.703655
Epoch:0 Step:40 Training_loss:0.672647 Training_loss_avg:0.698487
Epoch:0 Step:48 Training_loss:0.787323 Training_loss_avg:0.711178
Epoch:0 Step:56 Training_loss:0.697136 Training_loss_avg:0.709423
Epoch:0 Step:64 Training_loss:0.741864 Training_loss_avg:0.713027
Epoch:0 Step:72 Training_loss:0.724831 Training_loss_avg:0.714208
Epoch:0 Step:80 Training_loss:0.725162 Training_loss_avg:0.715203
Epoch:0 Step:88 Training_loss:0.690169 Training_loss_avg:0.713117
Epoch:0 Step:96 Training_loss:0.721194 Training_loss_avg:0.713739
Epoch:0 Step:104 Training_loss:0.672107 Training_loss_avg:0.710765
Epoch:0 Step:112 Training_loss:0.680563 Training_loss_avg:0.708751
Epoch:0 Step:120 Training_loss:0.661499 Tr

52it [00:07,  6.63it/s]


Epoch:0 Step:248 Val_loss:0.687724
Epoch:0 Step:256 Training_loss:0.660867 Training_loss_avg:0.690073
Epoch:0 Step:264 Training_loss:0.584309 Training_loss_avg:0.686962
Epoch:0 Step:272 Training_loss:0.772963 Training_loss_avg:0.689419
Epoch:0 Step:280 Training_loss:0.695596 Training_loss_avg:0.689591
Epoch:0 Step:288 Training_loss:0.723633 Training_loss_avg:0.690511
Epoch:0 Step:296 Training_loss:0.636351 Training_loss_avg:0.689086
Epoch:0 Step:304 Training_loss:0.661938 Training_loss_avg:0.688390
Epoch:0 Step:312 Training_loss:0.704445 Training_loss_avg:0.688791
Epoch:0 Step:320 Training_loss:0.651089 Training_loss_avg:0.687871
Epoch:0 Step:328 Training_loss:0.582421 Training_loss_avg:0.685361
Epoch:0 Step:336 Training_loss:0.624487 Training_loss_avg:0.683945
Epoch:0 Step:344 Training_loss:0.692671 Training_loss_avg:0.684143
Epoch:0 Step:352 Training_loss:0.758729 Training_loss_avg:0.685801
Epoch:0 Step:360 Training_loss:0.668645 Training_loss_avg:0.685428
Epoch:0 Step:368 Training_l

52it [00:07,  6.63it/s]


Epoch:0 Step:496 Val_loss:0.684758
Epoch:0 Step:504 Training_loss:0.690726 Training_loss_avg:0.683849
Epoch:0 Step:512 Training_loss:0.822972 Training_loss_avg:0.686697
Epoch:0 Step:520 Training_loss:0.683710 Training_loss_avg:0.687141
Epoch:0 Step:528 Training_loss:0.685248 Training_loss_avg:0.687223
Epoch:0 Step:536 Training_loss:0.651340 Training_loss_avg:0.687144
Epoch:0 Step:544 Training_loss:0.583454 Training_loss_avg:0.684246
Epoch:0 Step:552 Training_loss:0.685194 Training_loss_avg:0.684216
Epoch:0 Step:560 Training_loss:0.618213 Training_loss_avg:0.683607
Epoch:0 Step:568 Training_loss:0.639047 Training_loss_avg:0.682371
Epoch:0 Step:576 Training_loss:0.611387 Training_loss_avg:0.679939
Epoch:0 Step:584 Training_loss:0.589954 Training_loss_avg:0.678393
Epoch:0 Step:592 Training_loss:0.712269 Training_loss_avg:0.678554
Epoch:0 Step:600 Training_loss:0.718262 Training_loss_avg:0.680392
Epoch:0 Step:608 Training_loss:0.668872 Training_loss_avg:0.679811
Epoch:0 Step:616 Training_l

52it [00:07,  6.63it/s]


Epoch:0 Step:744 Val_loss:0.685121
Epoch:0 Step:752 Training_loss:0.657709 Training_loss_avg:0.689583
Epoch:0 Step:760 Training_loss:0.697955 Training_loss_avg:0.690169
Epoch:0 Step:768 Training_loss:0.723853 Training_loss_avg:0.689073
Epoch:0 Step:776 Training_loss:0.845911 Training_loss_avg:0.689938
Epoch:0 Step:784 Training_loss:0.772354 Training_loss_avg:0.689834
Epoch:0 Step:792 Training_loss:0.652735 Training_loss_avg:0.688820
Epoch:0 Step:800 Training_loss:0.670276 Training_loss_avg:0.686625
Epoch:0 Step:808 Training_loss:0.695523 Training_loss_avg:0.688007
Epoch:0 Step:816 Training_loss:0.655057 Training_loss_avg:0.687658
Epoch:0 Step:824 Training_loss:0.685315 Training_loss_avg:0.686788
Epoch:0 Step:832 Training_loss:0.692737 Training_loss_avg:0.688498
Epoch:0 Step:840 Training_loss:0.644996 Training_loss_avg:0.688138
Epoch:0 Step:848 Training_loss:0.701009 Training_loss_avg:0.688407
Epoch:0 Step:856 Training_loss:0.694305 Training_loss_avg:0.688395
Epoch:0 Step:864 Training_l

52it [00:07,  6.63it/s]


Epoch:0 Step:992 Val_loss:0.691349
Epoch:0 Step:1000 Training_loss:0.680122 Training_loss_avg:0.699499
Epoch:0 Step:1008 Training_loss:0.690417 Training_loss_avg:0.699930
Epoch:0 Step:1016 Training_loss:0.666070 Training_loss_avg:0.700570
Epoch:0 Step:1024 Training_loss:0.656996 Training_loss_avg:0.701957
Epoch:0 Step:1032 Training_loss:0.700816 Training_loss_avg:0.697237
Epoch:0 Step:1040 Training_loss:0.708624 Training_loss_avg:0.699999
Epoch:0 Step:1048 Training_loss:0.671756 Training_loss_avg:0.698789
Epoch:0 Step:1056 Training_loss:0.651017 Training_loss_avg:0.698794
Epoch:0 Step:1064 Training_loss:0.677267 Training_loss_avg:0.697274
Epoch:0 Step:1072 Training_loss:0.693167 Training_loss_avg:0.694102
Epoch:0 Step:1080 Training_loss:0.705199 Training_loss_avg:0.694271
Epoch:0 Step:1088 Training_loss:0.703545 Training_loss_avg:0.691285
Epoch:0 Step:1096 Training_loss:0.689722 Training_loss_avg:0.692909
Epoch:0 Step:1104 Training_loss:0.647177 Training_loss_avg:0.691950
Epoch:0 Step:

52it [00:07,  6.62it/s]


Epoch:0 Step:1240 Val_loss:0.685111
Epoch:0 Step:1248 Training_loss:0.637928 Training_loss_avg:0.692789
Epoch:0 Step:1256 Training_loss:0.709908 Training_loss_avg:0.693101
Epoch:0 Step:1264 Training_loss:0.614188 Training_loss_avg:0.690386
Epoch:0 Step:1272 Training_loss:0.664958 Training_loss_avg:0.689849
Epoch:0 Step:1280 Training_loss:0.696386 Training_loss_avg:0.689595
Epoch:0 Step:1288 Training_loss:0.680943 Training_loss_avg:0.689049
Epoch:0 Step:1296 Training_loss:0.680507 Training_loss_avg:0.688470
Epoch:0 Step:1304 Training_loss:0.788763 Training_loss_avg:0.690034
Epoch:0 Step:1312 Training_loss:0.559657 Training_loss_avg:0.687230
Epoch:0 Step:1320 Training_loss:0.763207 Training_loss_avg:0.689981
Epoch:0 Step:1328 Training_loss:0.682788 Training_loss_avg:0.689418
Epoch:0 Step:1336 Training_loss:0.781602 Training_loss_avg:0.691598
Epoch:0 Step:1344 Training_loss:0.738235 Training_loss_avg:0.692220
Epoch:0 Step:1352 Training_loss:0.635007 Training_loss_avg:0.690908
Epoch:0 Step

52it [00:07,  6.62it/s]


Epoch:0 Step:1488 Val_loss:0.685001
Epoch:0 Step:1496 Training_loss:0.665091 Training_loss_avg:0.693540
Epoch:0 Step:1504 Training_loss:0.779944 Training_loss_avg:0.696196
Epoch:0 Step:1512 Training_loss:0.656077 Training_loss_avg:0.695576
Epoch:0 Step:1520 Training_loss:0.707681 Training_loss_avg:0.696646
Epoch:0 Step:1528 Training_loss:0.690736 Training_loss_avg:0.696937
Epoch:0 Step:1536 Training_loss:0.703730 Training_loss_avg:0.698151
Epoch:0 Step:1544 Training_loss:0.733247 Training_loss_avg:0.699699
Epoch:0 Step:1552 Training_loss:0.783353 Training_loss_avg:0.701086
Epoch:0 Step:1560 Training_loss:0.663846 Training_loss_avg:0.701423
Epoch:0 Step:1568 Training_loss:0.626461 Training_loss_avg:0.700151
Epoch:0 Step:1576 Training_loss:0.674217 Training_loss_avg:0.696941
Epoch:0 Step:1584 Training_loss:0.752972 Training_loss_avg:0.696224
Epoch:0 Step:1592 Training_loss:0.689462 Training_loss_avg:0.695878
Epoch:0 Step:1600 Training_loss:0.693586 Training_loss_avg:0.697062
Epoch:0 Step

52it [00:07,  6.63it/s]


Epoch:0 Step:1736 Val_loss:0.684664
Epoch:0 Step:1744 Training_loss:0.833233 Training_loss_avg:0.702094
Epoch:0 Step:1752 Training_loss:0.742497 Training_loss_avg:0.704244
Epoch:0 Step:1760 Training_loss:0.585893 Training_loss_avg:0.701443
Epoch:0 Step:1768 Training_loss:0.687376 Training_loss_avg:0.702646
Epoch:0 Step:1776 Training_loss:0.745504 Training_loss_avg:0.703942
Epoch:0 Step:1784 Training_loss:0.671071 Training_loss_avg:0.703608
Epoch:0 Step:1792 Training_loss:0.674653 Training_loss_avg:0.703022
Epoch:0 Step:1800 Training_loss:0.670932 Training_loss_avg:0.702938
Epoch:0 Step:1808 Training_loss:0.657805 Training_loss_avg:0.704464
Epoch:0 Step:1816 Training_loss:0.742154 Training_loss_avg:0.703744
Epoch:0 Step:1824 Training_loss:0.666784 Training_loss_avg:0.704029
Epoch:0 Step:1832 Training_loss:0.685377 Training_loss_avg:0.704684
Epoch:0 Step:1840 Training_loss:0.658486 Training_loss_avg:0.702428
Epoch:0 Step:1848 Training_loss:0.692759 Training_loss_avg:0.702936
Epoch:0 Step

52it [00:07,  6.63it/s]


Epoch:0 Step:1984 Val_loss:0.683455
Epoch:0 Step:1992 Training_loss:0.698133 Training_loss_avg:0.693919
Epoch:0 Step:2000 Training_loss:0.644560 Training_loss_avg:0.692939
Epoch:0 Step:2008 Training_loss:0.729097 Training_loss_avg:0.695010
Epoch:0 Step:2016 Training_loss:0.717965 Training_loss_avg:0.695276
Epoch:0 Step:2024 Training_loss:0.664533 Training_loss_avg:0.694719
Epoch:0 Step:2032 Training_loss:0.716793 Training_loss_avg:0.695187
Epoch:0 Step:2040 Training_loss:0.648680 Training_loss_avg:0.695376
Epoch:0 Step:2048 Training_loss:0.626406 Training_loss_avg:0.691773
Epoch:0 Step:2056 Training_loss:0.710356 Training_loss_avg:0.692735
Epoch:0 Step:2064 Training_loss:0.724546 Training_loss_avg:0.694580
Epoch:0 Step:2072 Training_loss:0.679603 Training_loss_avg:0.695536
Epoch:0 Step:2080 Training_loss:0.736161 Training_loss_avg:0.696445
Epoch:0 Step:2088 Training_loss:0.683724 Training_loss_avg:0.695797
Epoch:0 Step:2096 Training_loss:0.630214 Training_loss_avg:0.693185
Epoch:0 Step

52it [00:07,  6.62it/s]


Epoch:0 Step:2232 Val_loss:0.683434
Epoch:0 Step:2240 Training_loss:0.710256 Training_loss_avg:0.691391
Epoch:0 Step:2248 Training_loss:0.628475 Training_loss_avg:0.690106
Epoch:0 Step:2256 Training_loss:0.632183 Training_loss_avg:0.687927
Epoch:0 Step:2264 Training_loss:0.708410 Training_loss_avg:0.689047
Epoch:0 Step:2272 Training_loss:0.697072 Training_loss_avg:0.690607
Epoch:0 Step:2280 Training_loss:0.672875 Training_loss_avg:0.690554
Epoch:0 Step:2288 Training_loss:0.670010 Training_loss_avg:0.690926
Epoch:0 Step:2296 Training_loss:0.724538 Training_loss_avg:0.689655
Epoch:0 Step:2304 Training_loss:0.668750 Training_loss_avg:0.690289
Epoch:0 Step:2312 Training_loss:0.698456 Training_loss_avg:0.691050
Epoch:0 Step:2320 Training_loss:0.674343 Training_loss_avg:0.689695
Epoch:0 Step:2328 Training_loss:0.686356 Training_loss_avg:0.688931
Epoch:0 Step:2336 Training_loss:0.737433 Training_loss_avg:0.689368
Epoch:0 Step:2344 Training_loss:0.679264 Training_loss_avg:0.688830
Epoch:0 Step

52it [00:07,  6.62it/s]


Epoch:0 Step:2480 Val_loss:0.689655
Epoch:0 Step:2488 Training_loss:0.670626 Training_loss_avg:0.689065
Epoch:0 Step:2496 Training_loss:0.700281 Training_loss_avg:0.690467
Epoch:0 Step:2504 Training_loss:0.713353 Training_loss_avg:0.691025
Epoch:0 Step:2512 Training_loss:0.692545 Training_loss_avg:0.691205
Epoch:0 Step:2520 Training_loss:0.675434 Training_loss_avg:0.691779
Epoch:0 Step:2528 Training_loss:0.709522 Training_loss_avg:0.692878
Epoch:0 Step:2536 Training_loss:0.730685 Training_loss_avg:0.693753
Epoch:0 Step:2544 Training_loss:0.730492 Training_loss_avg:0.694531
Epoch:0 Step:2552 Training_loss:0.660636 Training_loss_avg:0.693235
Epoch:0 Step:2560 Training_loss:0.722535 Training_loss_avg:0.693916
Epoch:0 Step:2568 Training_loss:0.743242 Training_loss_avg:0.696670
Epoch:0 Step:2576 Training_loss:0.676115 Training_loss_avg:0.694274
Epoch:0 Step:2584 Training_loss:0.740918 Training_loss_avg:0.696033
Epoch:0 Step:2592 Training_loss:0.711760 Training_loss_avg:0.695009
Epoch:0 Step

52it [00:07,  6.62it/s]


Epoch:0 Step:2728 Val_loss:0.687676
Epoch:0 Step:2736 Training_loss:0.687557 Training_loss_avg:0.694609
Epoch:0 Step:2744 Training_loss:0.655464 Training_loss_avg:0.694133
Epoch:0 Step:2752 Training_loss:0.654506 Training_loss_avg:0.693998
Epoch:0 Step:2760 Training_loss:0.706514 Training_loss_avg:0.694382
Epoch:0 Step:2768 Training_loss:0.702562 Training_loss_avg:0.695116
Epoch:0 Step:2776 Training_loss:0.656319 Training_loss_avg:0.694191
Epoch:0 Step:2784 Training_loss:0.698390 Training_loss_avg:0.694054
Epoch:0 Step:2792 Training_loss:0.667271 Training_loss_avg:0.693841
Epoch:0 Step:2800 Training_loss:0.624029 Training_loss_avg:0.693025
Epoch:0 Step:2808 Training_loss:0.704123 Training_loss_avg:0.692714
Epoch:0 Step:2816 Training_loss:0.632481 Training_loss_avg:0.691136
Epoch:0 Step:2824 Training_loss:0.690944 Training_loss_avg:0.691832
Epoch:0 Step:2832 Training_loss:0.642905 Training_loss_avg:0.689831
Epoch:0 Step:2840 Training_loss:0.654341 Training_loss_avg:0.689137
Epoch:0 Step

52it [00:07,  6.62it/s]


Epoch:0 Step:2976 Val_loss:0.683743
Epoch:0 Step:2984 Training_loss:0.717596 Training_loss_avg:0.685248
Epoch:0 Step:2992 Training_loss:0.764722 Training_loss_avg:0.686307
Epoch:0 Step:3000 Training_loss:0.675716 Training_loss_avg:0.685927
Epoch:0 Step:3008 Training_loss:0.664903 Training_loss_avg:0.685732
Epoch:0 Step:3016 Training_loss:0.638704 Training_loss_avg:0.685577
Epoch:0 Step:3024 Training_loss:0.660273 Training_loss_avg:0.685132
Epoch:0 Step:3032 Training_loss:0.712550 Training_loss_avg:0.685269
Epoch:0 Step:3040 Training_loss:0.647167 Training_loss_avg:0.684003
Epoch:0 Step:3048 Training_loss:0.655657 Training_loss_avg:0.683617
Epoch:0 Step:3056 Training_loss:0.704919 Training_loss_avg:0.683976
Epoch:0 Step:3064 Training_loss:0.746030 Training_loss_avg:0.685220
Epoch:0 Step:3072 Training_loss:0.672571 Training_loss_avg:0.684844
Epoch:0 Step:3080 Training_loss:0.628345 Training_loss_avg:0.683431
Epoch:0 Step:3088 Training_loss:0.683677 Training_loss_avg:0.682969
Epoch:0 Step

52it [00:07,  6.62it/s]


Epoch:0 Step:3224 Val_loss:0.681989
Epoch:0 Step:3232 Training_loss:0.679235 Training_loss_avg:0.686836
Epoch:0 Step:3240 Training_loss:0.662732 Training_loss_avg:0.687004
Epoch:0 Step:3248 Training_loss:0.681130 Training_loss_avg:0.688019
Epoch:0 Step:3256 Training_loss:0.662212 Training_loss_avg:0.689155
Epoch:0 Step:3264 Training_loss:0.721625 Training_loss_avg:0.691304
Epoch:0 Step:3272 Training_loss:0.700138 Training_loss_avg:0.692468
Epoch:0 Step:3280 Training_loss:0.671558 Training_loss_avg:0.692291
Epoch:0 Step:3288 Training_loss:0.737828 Training_loss_avg:0.693979
Epoch:0 Step:3296 Training_loss:0.682258 Training_loss_avg:0.692607
Epoch:0 Step:3304 Training_loss:0.668779 Training_loss_avg:0.691228
Epoch:0 Step:3312 Training_loss:0.687601 Training_loss_avg:0.689727
Epoch:0 Step:3320 Training_loss:0.675243 Training_loss_avg:0.687009
Epoch:0 Step:3328 Training_loss:0.770686 Training_loss_avg:0.687509
Epoch:1 Step:0 Training_loss:0.675241 Training_loss_avg:0.687293
Validating:


52it [00:07,  6.61it/s]


Epoch:1 Step:0 Val_loss:0.683516
Epoch:1 Step:8 Training_loss:0.698641 Training_loss_avg:0.688616
Epoch:1 Step:16 Training_loss:0.650637 Training_loss_avg:0.685811
Epoch:1 Step:24 Training_loss:0.681643 Training_loss_avg:0.686606
Epoch:1 Step:32 Training_loss:0.739442 Training_loss_avg:0.688677
Epoch:1 Step:40 Training_loss:0.759710 Training_loss_avg:0.690606
Epoch:1 Step:48 Training_loss:0.693499 Training_loss_avg:0.690124
Epoch:1 Step:56 Training_loss:0.675030 Training_loss_avg:0.688330
Epoch:1 Step:64 Training_loss:0.675040 Training_loss_avg:0.688316
Epoch:1 Step:72 Training_loss:0.713634 Training_loss_avg:0.689291
Epoch:1 Step:80 Training_loss:0.743392 Training_loss_avg:0.691385
Epoch:1 Step:88 Training_loss:0.656954 Training_loss_avg:0.691318
Epoch:1 Step:96 Training_loss:0.687182 Training_loss_avg:0.690811
Epoch:1 Step:104 Training_loss:0.690252 Training_loss_avg:0.691673
Epoch:1 Step:112 Training_loss:0.717182 Training_loss_avg:0.692903
Epoch:1 Step:120 Training_loss:0.730150 Tr

52it [00:07,  6.62it/s]


Epoch:1 Step:248 Val_loss:0.680570
Epoch:1 Step:256 Training_loss:0.681382 Training_loss_avg:0.692991
Epoch:1 Step:264 Training_loss:0.672042 Training_loss_avg:0.692584
Epoch:1 Step:272 Training_loss:0.614959 Training_loss_avg:0.690087
Epoch:1 Step:280 Training_loss:0.673256 Training_loss_avg:0.689364
Epoch:1 Step:288 Training_loss:0.671797 Training_loss_avg:0.689338
Epoch:1 Step:296 Training_loss:0.680587 Training_loss_avg:0.689365
Epoch:1 Step:304 Training_loss:0.867176 Training_loss_avg:0.693453
Epoch:1 Step:312 Training_loss:0.697069 Training_loss_avg:0.693772
Epoch:1 Step:320 Training_loss:0.645413 Training_loss_avg:0.693436
Epoch:1 Step:328 Training_loss:0.709741 Training_loss_avg:0.693199
Epoch:1 Step:336 Training_loss:0.806666 Training_loss_avg:0.695329
Epoch:1 Step:344 Training_loss:0.583359 Training_loss_avg:0.693565
Epoch:1 Step:352 Training_loss:0.693577 Training_loss_avg:0.692680
Epoch:1 Step:360 Training_loss:0.746011 Training_loss_avg:0.693955
Epoch:1 Step:368 Training_l

52it [00:07,  6.62it/s]


Epoch:1 Step:496 Val_loss:0.677406
Epoch:1 Step:504 Training_loss:0.630839 Training_loss_avg:0.690399
Epoch:1 Step:512 Training_loss:0.705085 Training_loss_avg:0.690157
Epoch:1 Step:520 Training_loss:0.673168 Training_loss_avg:0.689017
Epoch:1 Step:528 Training_loss:0.607180 Training_loss_avg:0.687587
Epoch:1 Step:536 Training_loss:0.716672 Training_loss_avg:0.688262
Epoch:1 Step:544 Training_loss:0.764135 Training_loss_avg:0.689616
Epoch:1 Step:552 Training_loss:0.669996 Training_loss_avg:0.689640
Epoch:1 Step:560 Training_loss:0.720482 Training_loss_avg:0.690653
Epoch:1 Step:568 Training_loss:0.678467 Training_loss_avg:0.690637
Epoch:1 Step:576 Training_loss:0.708448 Training_loss_avg:0.690332
Epoch:1 Step:584 Training_loss:0.649343 Training_loss_avg:0.690448
Epoch:1 Step:592 Training_loss:0.743188 Training_loss_avg:0.691592
Epoch:1 Step:600 Training_loss:0.696134 Training_loss_avg:0.692360
Epoch:1 Step:608 Training_loss:0.711141 Training_loss_avg:0.692163
Epoch:1 Step:616 Training_l

52it [00:07,  6.63it/s]


Epoch:1 Step:744 Val_loss:0.678724
Epoch:1 Step:752 Training_loss:0.687137 Training_loss_avg:0.692061
Epoch:1 Step:760 Training_loss:0.676717 Training_loss_avg:0.690675
Epoch:1 Step:768 Training_loss:0.671380 Training_loss_avg:0.689492
Epoch:1 Step:776 Training_loss:0.683669 Training_loss_avg:0.689280
Epoch:1 Step:784 Training_loss:0.724490 Training_loss_avg:0.689503
Epoch:1 Step:792 Training_loss:0.654619 Training_loss_avg:0.689206
Epoch:1 Step:800 Training_loss:0.701042 Training_loss_avg:0.690526
Epoch:1 Step:808 Training_loss:0.731029 Training_loss_avg:0.692618
Epoch:1 Step:816 Training_loss:0.667761 Training_loss_avg:0.692454
Epoch:1 Step:824 Training_loss:0.785247 Training_loss_avg:0.695549
Epoch:1 Step:832 Training_loss:0.736436 Training_loss_avg:0.695925
Epoch:1 Step:840 Training_loss:0.699905 Training_loss_avg:0.695908
Epoch:1 Step:848 Training_loss:0.653020 Training_loss_avg:0.695370
Epoch:1 Step:856 Training_loss:0.692532 Training_loss_avg:0.696091
Epoch:1 Step:864 Training_l

52it [00:07,  6.62it/s]


Epoch:1 Step:992 Val_loss:0.684318
Epoch:1 Step:1000 Training_loss:0.727141 Training_loss_avg:0.694759
Epoch:1 Step:1008 Training_loss:0.644488 Training_loss_avg:0.693426
Epoch:1 Step:1016 Training_loss:0.707845 Training_loss_avg:0.694108
Epoch:1 Step:1024 Training_loss:0.695221 Training_loss_avg:0.695010
Epoch:1 Step:1032 Training_loss:0.697709 Training_loss_avg:0.694718
Epoch:1 Step:1040 Training_loss:0.691777 Training_loss_avg:0.692383
Epoch:1 Step:1048 Training_loss:0.659219 Training_loss_avg:0.692497
Epoch:1 Step:1056 Training_loss:0.656350 Training_loss_avg:0.691171
Epoch:1 Step:1064 Training_loss:0.668021 Training_loss_avg:0.690996
Epoch:1 Step:1072 Training_loss:0.682650 Training_loss_avg:0.690925
Epoch:1 Step:1080 Training_loss:0.668794 Training_loss_avg:0.690952
Epoch:1 Step:1088 Training_loss:0.698356 Training_loss_avg:0.691170
Epoch:1 Step:1096 Training_loss:0.733442 Training_loss_avg:0.691659
Epoch:1 Step:1104 Training_loss:0.656274 Training_loss_avg:0.690681
Epoch:1 Step:

52it [00:07,  6.63it/s]


Epoch:1 Step:1240 Val_loss:0.675111
Epoch:1 Step:1248 Training_loss:0.644386 Training_loss_avg:0.689822
Epoch:1 Step:1256 Training_loss:0.690855 Training_loss_avg:0.689788
Epoch:1 Step:1264 Training_loss:0.659081 Training_loss_avg:0.688854
Epoch:1 Step:1272 Training_loss:0.611856 Training_loss_avg:0.686210
Epoch:1 Step:1280 Training_loss:0.739428 Training_loss_avg:0.686654
Epoch:1 Step:1288 Training_loss:0.704241 Training_loss_avg:0.686857
Epoch:1 Step:1296 Training_loss:0.666885 Training_loss_avg:0.686777
Epoch:1 Step:1304 Training_loss:0.669076 Training_loss_avg:0.686395
Epoch:1 Step:1312 Training_loss:0.641610 Training_loss_avg:0.685615
Epoch:1 Step:1320 Training_loss:0.638763 Training_loss_avg:0.685403
Epoch:1 Step:1328 Training_loss:0.711927 Training_loss_avg:0.685345
Epoch:1 Step:1336 Training_loss:0.759149 Training_loss_avg:0.687057
Epoch:1 Step:1344 Training_loss:0.648587 Training_loss_avg:0.686723
Epoch:1 Step:1352 Training_loss:0.725711 Training_loss_avg:0.687545
Epoch:1 Step

52it [00:07,  6.63it/s]


Epoch:1 Step:1488 Val_loss:0.669932
Epoch:1 Step:1496 Training_loss:0.796125 Training_loss_avg:0.689397
Epoch:1 Step:1504 Training_loss:0.654529 Training_loss_avg:0.689363
Epoch:1 Step:1512 Training_loss:0.670527 Training_loss_avg:0.688366
Epoch:1 Step:1520 Training_loss:0.710504 Training_loss_avg:0.689035
Epoch:1 Step:1528 Training_loss:0.648852 Training_loss_avg:0.688348
Epoch:1 Step:1536 Training_loss:0.719093 Training_loss_avg:0.689183
Epoch:1 Step:1544 Training_loss:0.669796 Training_loss_avg:0.688309
Epoch:1 Step:1552 Training_loss:0.745625 Training_loss_avg:0.689059
Epoch:1 Step:1560 Training_loss:0.682527 Training_loss_avg:0.689359
Epoch:1 Step:1568 Training_loss:0.656690 Training_loss_avg:0.689365
Epoch:1 Step:1576 Training_loss:0.699730 Training_loss_avg:0.688430
Epoch:1 Step:1584 Training_loss:0.635020 Training_loss_avg:0.687150
Epoch:1 Step:1592 Training_loss:0.579535 Training_loss_avg:0.685022
Epoch:1 Step:1600 Training_loss:0.543278 Training_loss_avg:0.682656
Epoch:1 Step

52it [00:07,  6.63it/s]


Epoch:1 Step:1736 Val_loss:0.676665
Epoch:1 Step:1744 Training_loss:0.692389 Training_loss_avg:0.685090
Epoch:1 Step:1752 Training_loss:0.608481 Training_loss_avg:0.682745
Epoch:1 Step:1760 Training_loss:0.733125 Training_loss_avg:0.681620
Epoch:1 Step:1768 Training_loss:0.633605 Training_loss_avg:0.682059
Epoch:1 Step:1776 Training_loss:0.728432 Training_loss_avg:0.681962
Epoch:1 Step:1784 Training_loss:0.621667 Training_loss_avg:0.679926
Epoch:1 Step:1792 Training_loss:0.743724 Training_loss_avg:0.682813
Epoch:1 Step:1800 Training_loss:0.737836 Training_loss_avg:0.683150
Epoch:1 Step:1808 Training_loss:0.648586 Training_loss_avg:0.681346
Epoch:1 Step:1816 Training_loss:0.847480 Training_loss_avg:0.684310
Epoch:1 Step:1824 Training_loss:0.661851 Training_loss_avg:0.683821
Epoch:1 Step:1832 Training_loss:0.661559 Training_loss_avg:0.684801
Epoch:1 Step:1840 Training_loss:0.646175 Training_loss_avg:0.682166
Epoch:1 Step:1848 Training_loss:0.708027 Training_loss_avg:0.682488
Epoch:1 Step

52it [00:07,  6.63it/s]


Epoch:1 Step:1984 Val_loss:0.669875
Epoch:1 Step:1992 Training_loss:0.686167 Training_loss_avg:0.681209
Epoch:1 Step:2000 Training_loss:0.587520 Training_loss_avg:0.682094
Epoch:1 Step:2008 Training_loss:0.673614 Training_loss_avg:0.681513
Epoch:1 Step:2016 Training_loss:0.592866 Training_loss_avg:0.677730
Epoch:1 Step:2024 Training_loss:0.694007 Training_loss_avg:0.678249
Epoch:1 Step:2032 Training_loss:0.665827 Training_loss_avg:0.674415
Epoch:1 Step:2040 Training_loss:0.677204 Training_loss_avg:0.675363
Epoch:1 Step:2048 Training_loss:0.673835 Training_loss_avg:0.677999
Epoch:1 Step:2056 Training_loss:0.701876 Training_loss_avg:0.676378
Epoch:1 Step:2064 Training_loss:0.624665 Training_loss_avg:0.675449
Epoch:1 Step:2072 Training_loss:0.697459 Training_loss_avg:0.674540
Epoch:1 Step:2080 Training_loss:0.609682 Training_loss_avg:0.672464
Epoch:1 Step:2088 Training_loss:0.714953 Training_loss_avg:0.673739
Epoch:1 Step:2096 Training_loss:0.625354 Training_loss_avg:0.672610
Epoch:1 Step

52it [00:07,  6.63it/s]


Epoch:1 Step:2232 Val_loss:0.664239
Epoch:1 Step:2240 Training_loss:0.634512 Training_loss_avg:0.675562
Epoch:1 Step:2248 Training_loss:0.639048 Training_loss_avg:0.674183
Epoch:1 Step:2256 Training_loss:0.632040 Training_loss_avg:0.672393
Epoch:1 Step:2264 Training_loss:0.610489 Training_loss_avg:0.669318
Epoch:1 Step:2272 Training_loss:0.706131 Training_loss_avg:0.670279
Epoch:1 Step:2280 Training_loss:0.645009 Training_loss_avg:0.670361
Epoch:1 Step:2288 Training_loss:0.616373 Training_loss_avg:0.670234
Epoch:1 Step:2296 Training_loss:0.641584 Training_loss_avg:0.671453
Epoch:1 Step:2304 Training_loss:0.694400 Training_loss_avg:0.673282
Epoch:1 Step:2312 Training_loss:0.698377 Training_loss_avg:0.674267
Epoch:1 Step:2320 Training_loss:0.851175 Training_loss_avg:0.677053
Epoch:1 Step:2328 Training_loss:0.620044 Training_loss_avg:0.675313
Epoch:1 Step:2336 Training_loss:0.746635 Training_loss_avg:0.677638
Epoch:1 Step:2344 Training_loss:0.607284 Training_loss_avg:0.676474
Epoch:1 Step

52it [00:07,  6.63it/s]


Epoch:1 Step:2480 Val_loss:0.663337
Epoch:1 Step:2488 Training_loss:0.644639 Training_loss_avg:0.673792
Epoch:1 Step:2496 Training_loss:0.639479 Training_loss_avg:0.674074
Epoch:1 Step:2504 Training_loss:0.623650 Training_loss_avg:0.672132
Epoch:1 Step:2512 Training_loss:0.804395 Training_loss_avg:0.671982
Epoch:1 Step:2520 Training_loss:0.624844 Training_loss_avg:0.671227
Epoch:1 Step:2528 Training_loss:0.662835 Training_loss_avg:0.670684
Epoch:1 Step:2536 Training_loss:0.684069 Training_loss_avg:0.670305
Epoch:1 Step:2544 Training_loss:0.598769 Training_loss_avg:0.669275
Epoch:1 Step:2552 Training_loss:0.653383 Training_loss_avg:0.667757
Epoch:1 Step:2560 Training_loss:0.615380 Training_loss_avg:0.665804
Epoch:1 Step:2568 Training_loss:0.770896 Training_loss_avg:0.668257
Epoch:1 Step:2576 Training_loss:0.620288 Training_loss_avg:0.663600
Epoch:1 Step:2584 Training_loss:0.753938 Training_loss_avg:0.665400
Epoch:1 Step:2592 Training_loss:0.586797 Training_loss_avg:0.663798
Epoch:1 Step

52it [00:07,  6.63it/s]


Epoch:1 Step:2728 Val_loss:0.664702
Epoch:1 Step:2736 Training_loss:0.706736 Training_loss_avg:0.669344
Epoch:1 Step:2744 Training_loss:0.658626 Training_loss_avg:0.670371
Epoch:1 Step:2752 Training_loss:0.593980 Training_loss_avg:0.669473
Epoch:1 Step:2760 Training_loss:0.676796 Training_loss_avg:0.669103
Epoch:1 Step:2768 Training_loss:0.741034 Training_loss_avg:0.671423
Epoch:1 Step:2776 Training_loss:0.688204 Training_loss_avg:0.671375
Epoch:1 Step:2784 Training_loss:0.630287 Training_loss_avg:0.668954
Epoch:1 Step:2792 Training_loss:0.688437 Training_loss_avg:0.668378
Epoch:1 Step:2800 Training_loss:0.680166 Training_loss_avg:0.670312
Epoch:1 Step:2808 Training_loss:0.707718 Training_loss_avg:0.668843
Epoch:1 Step:2816 Training_loss:0.661368 Training_loss_avg:0.671233
Epoch:1 Step:2824 Training_loss:0.639311 Training_loss_avg:0.670290
Epoch:1 Step:2832 Training_loss:0.732747 Training_loss_avg:0.670793
Epoch:1 Step:2840 Training_loss:0.745713 Training_loss_avg:0.671978
Epoch:1 Step

52it [00:07,  6.63it/s]


Epoch:1 Step:2976 Val_loss:0.665498
Epoch:1 Step:2984 Training_loss:0.699765 Training_loss_avg:0.665768
Epoch:1 Step:2992 Training_loss:0.787686 Training_loss_avg:0.669785
Epoch:1 Step:3000 Training_loss:0.642966 Training_loss_avg:0.668274
Epoch:1 Step:3008 Training_loss:0.619992 Training_loss_avg:0.666331
Epoch:1 Step:3016 Training_loss:0.678115 Training_loss_avg:0.667028
Epoch:1 Step:3024 Training_loss:0.686264 Training_loss_avg:0.667685
Epoch:1 Step:3032 Training_loss:0.560568 Training_loss_avg:0.667086
Epoch:1 Step:3040 Training_loss:0.535003 Training_loss_avg:0.666079
Epoch:1 Step:3048 Training_loss:0.485003 Training_loss_avg:0.659347
Epoch:1 Step:3056 Training_loss:0.791717 Training_loss_avg:0.661956
Epoch:1 Step:3064 Training_loss:0.991960 Training_loss_avg:0.666817
Epoch:1 Step:3072 Training_loss:0.701028 Training_loss_avg:0.669993
Epoch:1 Step:3080 Training_loss:0.761929 Training_loss_avg:0.671495
Epoch:1 Step:3088 Training_loss:0.751441 Training_loss_avg:0.672870
Epoch:1 Step

52it [00:07,  6.63it/s]


Epoch:1 Step:3224 Val_loss:0.664979
Epoch:1 Step:3232 Training_loss:0.711133 Training_loss_avg:0.670055
Epoch:1 Step:3240 Training_loss:0.741137 Training_loss_avg:0.669964
Epoch:1 Step:3248 Training_loss:0.668895 Training_loss_avg:0.671562
Epoch:1 Step:3256 Training_loss:0.672221 Training_loss_avg:0.669426
Epoch:1 Step:3264 Training_loss:0.653280 Training_loss_avg:0.671425
Epoch:1 Step:3272 Training_loss:0.604507 Training_loss_avg:0.670445
Epoch:1 Step:3280 Training_loss:0.645185 Training_loss_avg:0.670657
Epoch:1 Step:3288 Training_loss:0.739802 Training_loss_avg:0.671065
Epoch:1 Step:3296 Training_loss:0.635532 Training_loss_avg:0.673366
Epoch:1 Step:3304 Training_loss:0.565762 Training_loss_avg:0.672819
Epoch:1 Step:3312 Training_loss:0.713671 Training_loss_avg:0.674842
Epoch:1 Step:3320 Training_loss:0.703837 Training_loss_avg:0.676154
Epoch:1 Step:3328 Training_loss:0.515425 Training_loss_avg:0.671672
Epoch:2 Step:0 Training_loss:0.632314 Training_loss_avg:0.668190
Validating:


52it [00:07,  6.62it/s]


Epoch:2 Step:0 Val_loss:0.673970
Epoch:2 Step:8 Training_loss:0.749046 Training_loss_avg:0.669717
Epoch:2 Step:16 Training_loss:0.608098 Training_loss_avg:0.668607
Epoch:2 Step:24 Training_loss:0.419243 Training_loss_avg:0.664532
Epoch:2 Step:32 Training_loss:0.627908 Training_loss_avg:0.667718
Epoch:2 Step:40 Training_loss:0.614007 Training_loss_avg:0.667490
Epoch:2 Step:48 Training_loss:0.687785 Training_loss_avg:0.667250
Epoch:2 Step:56 Training_loss:0.678583 Training_loss_avg:0.665068
Epoch:2 Step:64 Training_loss:0.908369 Training_loss_avg:0.670376
Epoch:2 Step:72 Training_loss:0.467212 Training_loss_avg:0.667321
Epoch:2 Step:80 Training_loss:0.753479 Training_loss_avg:0.668828
Epoch:2 Step:88 Training_loss:0.470747 Training_loss_avg:0.664518
Epoch:2 Step:96 Training_loss:0.782176 Training_loss_avg:0.668950
Epoch:2 Step:104 Training_loss:0.450787 Training_loss_avg:0.667266
Epoch:2 Step:112 Training_loss:0.883758 Training_loss_avg:0.675241
Epoch:2 Step:120 Training_loss:0.674783 Tr

52it [00:07,  6.63it/s]


Epoch:2 Step:248 Val_loss:0.667291
Epoch:2 Step:256 Training_loss:0.701784 Training_loss_avg:0.661187
Epoch:2 Step:264 Training_loss:0.562396 Training_loss_avg:0.659972
Epoch:2 Step:272 Training_loss:0.729152 Training_loss_avg:0.661030
Epoch:2 Step:280 Training_loss:0.724220 Training_loss_avg:0.662834
Epoch:2 Step:288 Training_loss:0.666876 Training_loss_avg:0.662228
Epoch:2 Step:296 Training_loss:0.540198 Training_loss_avg:0.658809
Epoch:2 Step:304 Training_loss:0.705703 Training_loss_avg:0.658101
Epoch:2 Step:312 Training_loss:0.644023 Training_loss_avg:0.657603
Epoch:2 Step:320 Training_loss:0.710814 Training_loss_avg:0.658375
Epoch:2 Step:328 Training_loss:0.682151 Training_loss_avg:0.658952
Epoch:2 Step:336 Training_loss:0.736453 Training_loss_avg:0.661591
Epoch:2 Step:344 Training_loss:0.587667 Training_loss_avg:0.660441
Epoch:2 Step:352 Training_loss:0.761048 Training_loss_avg:0.660866
Epoch:2 Step:360 Training_loss:0.595153 Training_loss_avg:0.660058
Epoch:2 Step:368 Training_l

52it [00:07,  6.62it/s]


Epoch:2 Step:496 Val_loss:0.657187
Epoch:2 Step:504 Training_loss:0.657043 Training_loss_avg:0.671655
Epoch:2 Step:512 Training_loss:0.764770 Training_loss_avg:0.669275
Epoch:2 Step:520 Training_loss:0.820561 Training_loss_avg:0.672191
Epoch:2 Step:528 Training_loss:0.565997 Training_loss_avg:0.670221
Epoch:2 Step:536 Training_loss:0.501135 Training_loss_avg:0.664673
Epoch:2 Step:544 Training_loss:0.733594 Training_loss_avg:0.666236
Epoch:2 Step:552 Training_loss:0.634505 Training_loss_avg:0.666166
Epoch:2 Step:560 Training_loss:0.653952 Training_loss_avg:0.662497
Epoch:2 Step:568 Training_loss:0.762308 Training_loss_avg:0.664425
Epoch:2 Step:576 Training_loss:0.704392 Training_loss_avg:0.664510
Epoch:2 Step:584 Training_loss:0.886490 Training_loss_avg:0.670909
Epoch:2 Step:592 Training_loss:0.651162 Training_loss_avg:0.670868
Epoch:2 Step:600 Training_loss:0.710859 Training_loss_avg:0.672676
Epoch:2 Step:608 Training_loss:0.639971 Training_loss_avg:0.674211
Epoch:2 Step:616 Training_l

52it [00:07,  6.62it/s]


Epoch:2 Step:744 Val_loss:0.650037
Epoch:2 Step:752 Training_loss:0.599161 Training_loss_avg:0.664691
Epoch:2 Step:760 Training_loss:0.649799 Training_loss_avg:0.665784
Epoch:2 Step:768 Training_loss:0.642463 Training_loss_avg:0.664619
Epoch:2 Step:776 Training_loss:0.711804 Training_loss_avg:0.666669
Epoch:2 Step:784 Training_loss:0.750993 Training_loss_avg:0.670036
Epoch:2 Step:792 Training_loss:0.714664 Training_loss_avg:0.670157
Epoch:2 Step:800 Training_loss:0.640547 Training_loss_avg:0.668972
Epoch:2 Step:808 Training_loss:0.701086 Training_loss_avg:0.670258
Epoch:2 Step:816 Training_loss:0.455336 Training_loss_avg:0.664899
Epoch:2 Step:824 Training_loss:0.600917 Training_loss_avg:0.663360
Epoch:2 Step:832 Training_loss:0.764109 Training_loss_avg:0.662658
Epoch:2 Step:840 Training_loss:0.642439 Training_loss_avg:0.659133
Epoch:2 Step:848 Training_loss:0.640890 Training_loss_avg:0.661475
Epoch:2 Step:856 Training_loss:0.566257 Training_loss_avg:0.659968
Epoch:2 Step:864 Training_l

52it [00:07,  6.62it/s]


Epoch:2 Step:992 Val_loss:0.652148
Epoch:2 Step:1000 Training_loss:0.875796 Training_loss_avg:0.666961
Epoch:2 Step:1008 Training_loss:0.528668 Training_loss_avg:0.664735
Epoch:2 Step:1016 Training_loss:0.769430 Training_loss_avg:0.665042
Epoch:2 Step:1024 Training_loss:0.723968 Training_loss_avg:0.667341
Epoch:2 Step:1032 Training_loss:0.755168 Training_loss_avg:0.667627
Epoch:2 Step:1040 Training_loss:0.635703 Training_loss_avg:0.669464
Epoch:2 Step:1048 Training_loss:0.849522 Training_loss_avg:0.672387
Epoch:2 Step:1056 Training_loss:0.723479 Training_loss_avg:0.674041
Epoch:2 Step:1064 Training_loss:0.665009 Training_loss_avg:0.674215
Epoch:2 Step:1072 Training_loss:0.719794 Training_loss_avg:0.676878
Epoch:2 Step:1080 Training_loss:0.672545 Training_loss_avg:0.677944
Epoch:2 Step:1088 Training_loss:0.639211 Training_loss_avg:0.676056
Epoch:2 Step:1096 Training_loss:0.636855 Training_loss_avg:0.676231
Epoch:2 Step:1104 Training_loss:0.557858 Training_loss_avg:0.676343
Epoch:2 Step:

52it [00:07,  6.63it/s]


Epoch:2 Step:1240 Val_loss:0.648114
Epoch:2 Step:1248 Training_loss:0.726897 Training_loss_avg:0.677100
Epoch:2 Step:1256 Training_loss:0.720158 Training_loss_avg:0.680178
Epoch:2 Step:1264 Training_loss:0.720534 Training_loss_avg:0.681433
Epoch:2 Step:1272 Training_loss:0.642265 Training_loss_avg:0.679512
Epoch:2 Step:1280 Training_loss:0.774719 Training_loss_avg:0.679601
Epoch:2 Step:1288 Training_loss:0.635319 Training_loss_avg:0.679849
Epoch:2 Step:1296 Training_loss:0.668035 Training_loss_avg:0.681813
Epoch:2 Step:1304 Training_loss:0.707292 Training_loss_avg:0.685639
Epoch:2 Step:1312 Training_loss:0.811154 Training_loss_avg:0.690465
Epoch:2 Step:1320 Training_loss:0.707970 Training_loss_avg:0.692387
Epoch:2 Step:1328 Training_loss:0.706693 Training_loss_avg:0.692942
Epoch:2 Step:1336 Training_loss:0.706764 Training_loss_avg:0.691661
Epoch:2 Step:1344 Training_loss:0.678268 Training_loss_avg:0.692394
Epoch:2 Step:1352 Training_loss:0.606145 Training_loss_avg:0.690926
Epoch:2 Step

52it [00:07,  6.63it/s]


Epoch:2 Step:1488 Val_loss:0.667529
Epoch:2 Step:1496 Training_loss:0.564692 Training_loss_avg:0.660992
Epoch:2 Step:1504 Training_loss:0.630260 Training_loss_avg:0.662440
Epoch:2 Step:1512 Training_loss:0.446825 Training_loss_avg:0.656640
Epoch:2 Step:1520 Training_loss:0.506184 Training_loss_avg:0.653061
Epoch:2 Step:1528 Training_loss:0.593108 Training_loss_avg:0.653109
Epoch:2 Step:1536 Training_loss:0.834750 Training_loss_avg:0.657100
Epoch:2 Step:1544 Training_loss:0.591067 Training_loss_avg:0.655115
Epoch:2 Step:1552 Training_loss:0.641220 Training_loss_avg:0.652376
Epoch:2 Step:1560 Training_loss:0.689340 Training_loss_avg:0.652357
Epoch:2 Step:1568 Training_loss:0.539671 Training_loss_avg:0.649795
Epoch:2 Step:1576 Training_loss:0.621233 Training_loss_avg:0.650003
Epoch:2 Step:1584 Training_loss:0.356537 Training_loss_avg:0.645177
Epoch:2 Step:1592 Training_loss:0.824166 Training_loss_avg:0.647852
Epoch:2 Step:1600 Training_loss:0.823204 Training_loss_avg:0.652042
Epoch:2 Step

52it [00:07,  6.63it/s]


Epoch:2 Step:1736 Val_loss:0.645212
Epoch:2 Step:1744 Training_loss:0.606569 Training_loss_avg:0.643122
Epoch:2 Step:1752 Training_loss:0.626665 Training_loss_avg:0.643533
Epoch:2 Step:1760 Training_loss:0.696431 Training_loss_avg:0.643659
Epoch:2 Step:1768 Training_loss:0.601412 Training_loss_avg:0.641379
Epoch:2 Step:1776 Training_loss:0.559808 Training_loss_avg:0.639921
Epoch:2 Step:1784 Training_loss:0.644702 Training_loss_avg:0.639808
Epoch:2 Step:1792 Training_loss:0.812446 Training_loss_avg:0.642907
Epoch:2 Step:1800 Training_loss:0.630890 Training_loss_avg:0.643449
Epoch:2 Step:1808 Training_loss:0.707965 Training_loss_avg:0.643837
Epoch:2 Step:1816 Training_loss:0.573040 Training_loss_avg:0.643494
Epoch:2 Step:1824 Training_loss:0.591147 Training_loss_avg:0.639466
Epoch:2 Step:1832 Training_loss:0.627736 Training_loss_avg:0.639298
Epoch:2 Step:1840 Training_loss:0.668918 Training_loss_avg:0.641799
Epoch:2 Step:1848 Training_loss:0.546838 Training_loss_avg:0.640866
Epoch:2 Step

52it [00:07,  6.63it/s]


Epoch:2 Step:1984 Val_loss:0.652091
Epoch:2 Step:1992 Training_loss:0.651229 Training_loss_avg:0.661288
Epoch:2 Step:2000 Training_loss:0.584924 Training_loss_avg:0.656522
Epoch:2 Step:2008 Training_loss:0.637876 Training_loss_avg:0.657836
Epoch:2 Step:2016 Training_loss:0.647769 Training_loss_avg:0.654253
Epoch:2 Step:2024 Training_loss:0.778752 Training_loss_avg:0.654437
Epoch:2 Step:2032 Training_loss:0.673761 Training_loss_avg:0.654731
Epoch:2 Step:2040 Training_loss:0.738783 Training_loss_avg:0.659341
Epoch:2 Step:2048 Training_loss:0.615232 Training_loss_avg:0.658703
Epoch:2 Step:2056 Training_loss:0.680905 Training_loss_avg:0.657349
Epoch:2 Step:2064 Training_loss:0.595835 Training_loss_avg:0.654439
Epoch:2 Step:2072 Training_loss:0.640057 Training_loss_avg:0.654179
Epoch:2 Step:2080 Training_loss:0.644256 Training_loss_avg:0.655142
Epoch:2 Step:2088 Training_loss:0.674929 Training_loss_avg:0.652108
Epoch:2 Step:2096 Training_loss:0.566206 Training_loss_avg:0.648850
Epoch:2 Step

52it [00:07,  6.63it/s]


Epoch:2 Step:2232 Val_loss:0.643124
Epoch:2 Step:2240 Training_loss:0.729000 Training_loss_avg:0.655003
Epoch:2 Step:2248 Training_loss:0.631227 Training_loss_avg:0.656690
Epoch:2 Step:2256 Training_loss:0.705733 Training_loss_avg:0.655327
Epoch:2 Step:2264 Training_loss:0.522760 Training_loss_avg:0.653702
Epoch:2 Step:2272 Training_loss:0.707857 Training_loss_avg:0.655474
Epoch:2 Step:2280 Training_loss:0.707570 Training_loss_avg:0.655820
Epoch:2 Step:2288 Training_loss:0.660235 Training_loss_avg:0.657102
Epoch:2 Step:2296 Training_loss:0.627935 Training_loss_avg:0.654150
Epoch:2 Step:2304 Training_loss:0.576782 Training_loss_avg:0.654727
Epoch:2 Step:2312 Training_loss:0.670684 Training_loss_avg:0.653745
Epoch:2 Step:2320 Training_loss:0.584436 Training_loss_avg:0.653380
Epoch:2 Step:2328 Training_loss:0.716388 Training_loss_avg:0.655675
Epoch:2 Step:2336 Training_loss:0.669911 Training_loss_avg:0.656333
Epoch:2 Step:2344 Training_loss:0.644039 Training_loss_avg:0.655775
Epoch:2 Step

52it [00:07,  6.64it/s]


Epoch:2 Step:2480 Val_loss:0.667107
Epoch:2 Step:2488 Training_loss:0.725423 Training_loss_avg:0.634997
Epoch:2 Step:2496 Training_loss:0.798613 Training_loss_avg:0.639645
Epoch:2 Step:2504 Training_loss:0.678669 Training_loss_avg:0.640257
Epoch:2 Step:2512 Training_loss:0.530315 Training_loss_avg:0.639166
Epoch:2 Step:2520 Training_loss:0.628478 Training_loss_avg:0.638180
Epoch:2 Step:2528 Training_loss:0.655086 Training_loss_avg:0.636885
Epoch:2 Step:2536 Training_loss:0.572157 Training_loss_avg:0.632853
Epoch:2 Step:2544 Training_loss:0.718637 Training_loss_avg:0.632586
Epoch:2 Step:2552 Training_loss:0.621512 Training_loss_avg:0.634056
Epoch:2 Step:2560 Training_loss:0.613827 Training_loss_avg:0.633851
Epoch:2 Step:2568 Training_loss:0.465207 Training_loss_avg:0.628865
Epoch:2 Step:2576 Training_loss:0.589816 Training_loss_avg:0.630753
Epoch:2 Step:2584 Training_loss:0.750886 Training_loss_avg:0.630010
Epoch:2 Step:2592 Training_loss:0.574663 Training_loss_avg:0.629715
Epoch:2 Step

52it [00:07,  6.63it/s]


Epoch:2 Step:2728 Val_loss:0.644382
Epoch:2 Step:2736 Training_loss:0.522663 Training_loss_avg:0.635340
Epoch:2 Step:2744 Training_loss:0.517171 Training_loss_avg:0.632803
Epoch:2 Step:2752 Training_loss:0.558590 Training_loss_avg:0.632093
Epoch:2 Step:2760 Training_loss:0.593019 Training_loss_avg:0.632629
Epoch:2 Step:2768 Training_loss:0.628411 Training_loss_avg:0.636623
Epoch:2 Step:2776 Training_loss:0.787454 Training_loss_avg:0.640658
Epoch:2 Step:2784 Training_loss:0.687255 Training_loss_avg:0.641832
Epoch:2 Step:2792 Training_loss:0.917549 Training_loss_avg:0.645899
Epoch:2 Step:2800 Training_loss:0.526590 Training_loss_avg:0.641599
Epoch:2 Step:2808 Training_loss:0.653456 Training_loss_avg:0.642929
Epoch:2 Step:2816 Training_loss:0.769197 Training_loss_avg:0.648012
Epoch:2 Step:2824 Training_loss:0.617442 Training_loss_avg:0.647683
Epoch:2 Step:2832 Training_loss:0.708555 Training_loss_avg:0.652556
Epoch:2 Step:2840 Training_loss:0.687014 Training_loss_avg:0.653659
Epoch:2 Step

52it [00:07,  6.63it/s]


Epoch:2 Step:2976 Val_loss:0.635603
Epoch:2 Step:2984 Training_loss:0.575970 Training_loss_avg:0.644968
Epoch:2 Step:2992 Training_loss:0.581850 Training_loss_avg:0.645112
Epoch:2 Step:3000 Training_loss:0.640558 Training_loss_avg:0.640428
Epoch:2 Step:3008 Training_loss:0.703826 Training_loss_avg:0.639817
Epoch:2 Step:3016 Training_loss:0.695978 Training_loss_avg:0.639180
Epoch:2 Step:3024 Training_loss:0.603389 Training_loss_avg:0.634653
Epoch:2 Step:3032 Training_loss:0.494055 Training_loss_avg:0.629347
Epoch:2 Step:3040 Training_loss:0.670529 Training_loss_avg:0.630877
Epoch:2 Step:3048 Training_loss:0.734219 Training_loss_avg:0.635103
Epoch:2 Step:3056 Training_loss:0.433093 Training_loss_avg:0.629152
Epoch:2 Step:3064 Training_loss:0.774590 Training_loss_avg:0.629786
Epoch:2 Step:3072 Training_loss:0.641449 Training_loss_avg:0.631533
Epoch:2 Step:3080 Training_loss:0.522892 Training_loss_avg:0.630465
Epoch:2 Step:3088 Training_loss:0.762927 Training_loss_avg:0.634985
Epoch:2 Step

52it [00:07,  6.63it/s]


Epoch:2 Step:3224 Val_loss:0.704993
Epoch:2 Step:3232 Training_loss:0.759149 Training_loss_avg:0.621352
Epoch:2 Step:3240 Training_loss:0.309010 Training_loss_avg:0.613792
Epoch:2 Step:3248 Training_loss:0.625338 Training_loss_avg:0.614707
Epoch:2 Step:3256 Training_loss:0.721829 Training_loss_avg:0.615015
Epoch:2 Step:3264 Training_loss:0.614712 Training_loss_avg:0.614687
Epoch:2 Step:3272 Training_loss:0.490811 Training_loss_avg:0.611305
Epoch:2 Step:3280 Training_loss:0.611729 Training_loss_avg:0.610710
Epoch:2 Step:3288 Training_loss:0.993210 Training_loss_avg:0.618576
Epoch:2 Step:3296 Training_loss:0.608792 Training_loss_avg:0.619729
Epoch:2 Step:3304 Training_loss:0.552290 Training_loss_avg:0.620953
Epoch:2 Step:3312 Training_loss:0.487582 Training_loss_avg:0.617793
Epoch:2 Step:3320 Training_loss:0.641843 Training_loss_avg:0.617683
Epoch:2 Step:3328 Training_loss:0.567009 Training_loss_avg:0.614742
Epoch:3 Step:0 Training_loss:0.759489 Training_loss_avg:0.616170
Validating:


52it [00:07,  6.62it/s]


Epoch:3 Step:0 Val_loss:0.636881
Epoch:3 Step:8 Training_loss:0.652917 Training_loss_avg:0.616657
Epoch:3 Step:16 Training_loss:0.444506 Training_loss_avg:0.613896
Epoch:3 Step:24 Training_loss:0.533627 Training_loss_avg:0.615652
Epoch:3 Step:32 Training_loss:0.568568 Training_loss_avg:0.615705
Epoch:3 Step:40 Training_loss:0.552585 Training_loss_avg:0.610407
Epoch:3 Step:48 Training_loss:0.517421 Training_loss_avg:0.609236
Epoch:3 Step:56 Training_loss:0.545271 Training_loss_avg:0.608504
Epoch:3 Step:64 Training_loss:0.521237 Training_loss_avg:0.606118
Epoch:3 Step:72 Training_loss:0.661989 Training_loss_avg:0.605281
Epoch:3 Step:80 Training_loss:0.674257 Training_loss_avg:0.604847
Epoch:3 Step:88 Training_loss:0.599867 Training_loss_avg:0.604776
Epoch:3 Step:96 Training_loss:0.761393 Training_loss_avg:0.610123
Epoch:3 Step:104 Training_loss:0.521992 Training_loss_avg:0.607152
Epoch:3 Step:112 Training_loss:0.573627 Training_loss_avg:0.603940
Epoch:3 Step:120 Training_loss:0.501622 Tr

52it [00:07,  6.62it/s]


Epoch:3 Step:248 Val_loss:0.649649
Epoch:3 Step:256 Training_loss:0.715594 Training_loss_avg:0.644350
Epoch:3 Step:264 Training_loss:0.741008 Training_loss_avg:0.646011
Epoch:3 Step:272 Training_loss:0.698908 Training_loss_avg:0.641668
Epoch:3 Step:280 Training_loss:0.498548 Training_loss_avg:0.638834
Epoch:3 Step:288 Training_loss:0.744507 Training_loss_avg:0.641914
Epoch:3 Step:296 Training_loss:0.592035 Training_loss_avg:0.638572
Epoch:3 Step:304 Training_loss:0.717265 Training_loss_avg:0.646737
Epoch:3 Step:312 Training_loss:0.606677 Training_loss_avg:0.646364
Epoch:3 Step:320 Training_loss:0.537711 Training_loss_avg:0.642681
Epoch:3 Step:328 Training_loss:0.684418 Training_loss_avg:0.644075
Epoch:3 Step:336 Training_loss:0.595777 Training_loss_avg:0.646175
Epoch:3 Step:344 Training_loss:0.517149 Training_loss_avg:0.644283
Epoch:3 Step:352 Training_loss:0.629299 Training_loss_avg:0.637005
Epoch:3 Step:360 Training_loss:0.617580 Training_loss_avg:0.637181
Epoch:3 Step:368 Training_l

52it [00:07,  6.63it/s]


Epoch:3 Step:496 Val_loss:0.632128
Epoch:3 Step:504 Training_loss:0.606202 Training_loss_avg:0.649019
Epoch:3 Step:512 Training_loss:0.673804 Training_loss_avg:0.651023
Epoch:3 Step:520 Training_loss:0.697664 Training_loss_avg:0.654944
Epoch:3 Step:528 Training_loss:0.633383 Training_loss_avg:0.651993
Epoch:3 Step:536 Training_loss:0.696257 Training_loss_avg:0.653604
Epoch:3 Step:544 Training_loss:0.599328 Training_loss_avg:0.654003
Epoch:3 Step:552 Training_loss:0.528277 Training_loss_avg:0.653386
Epoch:3 Step:560 Training_loss:0.651135 Training_loss_avg:0.650929
Epoch:3 Step:568 Training_loss:0.693161 Training_loss_avg:0.644791
Epoch:3 Step:576 Training_loss:0.716677 Training_loss_avg:0.638195
Epoch:3 Step:584 Training_loss:0.528600 Training_loss_avg:0.637376
Epoch:3 Step:592 Training_loss:0.677742 Training_loss_avg:0.635877
Epoch:3 Step:600 Training_loss:0.517568 Training_loss_avg:0.635297
Epoch:3 Step:608 Training_loss:0.616929 Training_loss_avg:0.634578
Epoch:3 Step:616 Training_l

52it [00:07,  6.63it/s]


Epoch:3 Step:744 Val_loss:0.647675
Epoch:3 Step:752 Training_loss:0.563429 Training_loss_avg:0.616312
Epoch:3 Step:760 Training_loss:0.501299 Training_loss_avg:0.613986
Epoch:3 Step:768 Training_loss:0.582200 Training_loss_avg:0.614890
Epoch:3 Step:776 Training_loss:0.747596 Training_loss_avg:0.616649
Epoch:3 Step:784 Training_loss:0.520847 Training_loss_avg:0.614522
Epoch:3 Step:792 Training_loss:0.862529 Training_loss_avg:0.622028
Epoch:3 Step:800 Training_loss:0.713125 Training_loss_avg:0.626818
Epoch:3 Step:808 Training_loss:0.699573 Training_loss_avg:0.624298
Epoch:3 Step:816 Training_loss:0.819231 Training_loss_avg:0.632876
Epoch:3 Step:824 Training_loss:0.709282 Training_loss_avg:0.637998
Epoch:3 Step:832 Training_loss:0.746879 Training_loss_avg:0.639475
Epoch:3 Step:840 Training_loss:0.402578 Training_loss_avg:0.633804
Epoch:3 Step:848 Training_loss:0.839454 Training_loss_avg:0.640687
Epoch:3 Step:856 Training_loss:0.584913 Training_loss_avg:0.636333
Epoch:3 Step:864 Training_l

52it [00:07,  6.61it/s]


Epoch:3 Step:992 Val_loss:0.647727
Epoch:3 Step:1000 Training_loss:0.694872 Training_loss_avg:0.634190
Epoch:3 Step:1008 Training_loss:0.542834 Training_loss_avg:0.632708
Epoch:3 Step:1016 Training_loss:0.684487 Training_loss_avg:0.634831
Epoch:3 Step:1024 Training_loss:0.789641 Training_loss_avg:0.639903
Epoch:3 Step:1032 Training_loss:0.618908 Training_loss_avg:0.641521
Epoch:3 Step:1040 Training_loss:0.900174 Training_loss_avg:0.650586
Epoch:3 Step:1048 Training_loss:0.667366 Training_loss_avg:0.651871
Epoch:3 Step:1056 Training_loss:0.427438 Training_loss_avg:0.649079
Epoch:3 Step:1064 Training_loss:0.683577 Training_loss_avg:0.653564
Epoch:3 Step:1072 Training_loss:0.542618 Training_loss_avg:0.649125
Epoch:3 Step:1080 Training_loss:0.531278 Training_loss_avg:0.645679
Epoch:3 Step:1088 Training_loss:0.718635 Training_loss_avg:0.649779
Epoch:3 Step:1096 Training_loss:0.711905 Training_loss_avg:0.653166
Epoch:3 Step:1104 Training_loss:0.647533 Training_loss_avg:0.654086
Epoch:3 Step:

52it [00:07,  6.61it/s]


Epoch:3 Step:1240 Val_loss:0.620107
Epoch:3 Step:1248 Training_loss:0.554033 Training_loss_avg:0.628768
Epoch:3 Step:1256 Training_loss:0.473915 Training_loss_avg:0.626548
Epoch:3 Step:1264 Training_loss:0.538105 Training_loss_avg:0.618667
Epoch:3 Step:1272 Training_loss:0.546544 Training_loss_avg:0.618156
Epoch:3 Step:1280 Training_loss:0.590113 Training_loss_avg:0.617867
Epoch:3 Step:1288 Training_loss:0.655791 Training_loss_avg:0.617478
Epoch:3 Step:1296 Training_loss:0.607457 Training_loss_avg:0.616249
Epoch:3 Step:1304 Training_loss:0.779720 Training_loss_avg:0.616562
Epoch:3 Step:1312 Training_loss:0.726954 Training_loss_avg:0.620342
Epoch:3 Step:1320 Training_loss:0.595889 Training_loss_avg:0.620201
Epoch:3 Step:1328 Training_loss:0.550393 Training_loss_avg:0.618004
Epoch:3 Step:1336 Training_loss:0.501877 Training_loss_avg:0.620454
Epoch:3 Step:1344 Training_loss:0.788295 Training_loss_avg:0.623289
Epoch:3 Step:1352 Training_loss:0.906057 Training_loss_avg:0.630276
Epoch:3 Step

52it [00:07,  6.61it/s]


Epoch:3 Step:1488 Val_loss:0.615815
Epoch:3 Step:1496 Training_loss:0.605323 Training_loss_avg:0.604218
Epoch:3 Step:1504 Training_loss:0.653648 Training_loss_avg:0.604341
Epoch:3 Step:1512 Training_loss:0.564552 Training_loss_avg:0.601960
Epoch:3 Step:1520 Training_loss:0.728738 Training_loss_avg:0.603659
Epoch:3 Step:1528 Training_loss:0.625774 Training_loss_avg:0.602629
Epoch:3 Step:1536 Training_loss:0.543089 Training_loss_avg:0.600700
Epoch:3 Step:1544 Training_loss:0.692083 Training_loss_avg:0.601825
Epoch:3 Step:1552 Training_loss:0.838689 Training_loss_avg:0.609671
Epoch:3 Step:1560 Training_loss:0.616060 Training_loss_avg:0.606257
Epoch:3 Step:1568 Training_loss:0.527108 Training_loss_avg:0.606763
Epoch:3 Step:1576 Training_loss:0.771124 Training_loss_avg:0.613591
Epoch:3 Step:1584 Training_loss:0.908087 Training_loss_avg:0.617940
Epoch:3 Step:1592 Training_loss:0.698436 Training_loss_avg:0.620767
Epoch:3 Step:1600 Training_loss:0.599395 Training_loss_avg:0.622570
Epoch:3 Step

52it [00:07,  6.61it/s]


Epoch:3 Step:1736 Val_loss:0.618221
Epoch:3 Step:1744 Training_loss:0.670640 Training_loss_avg:0.639345
Epoch:3 Step:1752 Training_loss:0.547954 Training_loss_avg:0.632183
Epoch:3 Step:1760 Training_loss:0.790789 Training_loss_avg:0.636097
Epoch:3 Step:1768 Training_loss:0.683734 Training_loss_avg:0.638877
Epoch:3 Step:1776 Training_loss:0.659250 Training_loss_avg:0.639680
Epoch:3 Step:1784 Training_loss:0.463772 Training_loss_avg:0.641247
Epoch:3 Step:1792 Training_loss:0.544431 Training_loss_avg:0.642840
Epoch:3 Step:1800 Training_loss:0.604102 Training_loss_avg:0.644933
Epoch:3 Step:1808 Training_loss:0.688894 Training_loss_avg:0.649754
Epoch:3 Step:1816 Training_loss:0.633041 Training_loss_avg:0.649720
Epoch:3 Step:1824 Training_loss:0.553402 Training_loss_avg:0.654037
Epoch:3 Step:1832 Training_loss:0.573664 Training_loss_avg:0.650095
Epoch:3 Step:1840 Training_loss:0.606705 Training_loss_avg:0.649075
Epoch:3 Step:1848 Training_loss:0.571676 Training_loss_avg:0.648892
Epoch:3 Step

52it [00:07,  6.61it/s]


Epoch:3 Step:1984 Val_loss:0.614052
Epoch:3 Step:1992 Training_loss:0.668944 Training_loss_avg:0.646610
Epoch:3 Step:2000 Training_loss:0.598466 Training_loss_avg:0.646591
Epoch:3 Step:2008 Training_loss:0.617817 Training_loss_avg:0.648235
Epoch:3 Step:2016 Training_loss:0.543359 Training_loss_avg:0.645628
Epoch:3 Step:2024 Training_loss:0.554963 Training_loss_avg:0.643505
Epoch:3 Step:2032 Training_loss:0.525429 Training_loss_avg:0.642456
Epoch:3 Step:2040 Training_loss:0.581494 Training_loss_avg:0.642226
Epoch:3 Step:2048 Training_loss:0.623709 Training_loss_avg:0.642826
Epoch:3 Step:2056 Training_loss:0.554683 Training_loss_avg:0.638148
Epoch:3 Step:2064 Training_loss:0.764101 Training_loss_avg:0.640817
Epoch:3 Step:2072 Training_loss:0.625244 Training_loss_avg:0.640332
Epoch:3 Step:2080 Training_loss:0.589853 Training_loss_avg:0.637972
Epoch:3 Step:2088 Training_loss:0.725912 Training_loss_avg:0.639034
Epoch:3 Step:2096 Training_loss:0.590541 Training_loss_avg:0.637195
Epoch:3 Step

52it [00:07,  6.61it/s]


Epoch:3 Step:2232 Val_loss:0.692636
Epoch:3 Step:2240 Training_loss:0.792364 Training_loss_avg:0.627802
Epoch:3 Step:2248 Training_loss:0.426079 Training_loss_avg:0.624891
Epoch:3 Step:2256 Training_loss:0.534268 Training_loss_avg:0.623490
Epoch:3 Step:2264 Training_loss:0.678583 Training_loss_avg:0.626936
Epoch:3 Step:2272 Training_loss:0.857831 Training_loss_avg:0.633134
Epoch:3 Step:2280 Training_loss:0.670521 Training_loss_avg:0.632366
Epoch:3 Step:2288 Training_loss:0.777062 Training_loss_avg:0.636844
Epoch:3 Step:2296 Training_loss:0.376634 Training_loss_avg:0.630767
Epoch:3 Step:2304 Training_loss:0.474264 Training_loss_avg:0.626217
Epoch:3 Step:2312 Training_loss:0.655642 Training_loss_avg:0.622266
Epoch:3 Step:2320 Training_loss:0.576477 Training_loss_avg:0.614485
Epoch:3 Step:2328 Training_loss:0.580311 Training_loss_avg:0.613081
Epoch:3 Step:2336 Training_loss:0.685345 Training_loss_avg:0.611674
Epoch:3 Step:2344 Training_loss:0.773377 Training_loss_avg:0.615370
Epoch:3 Step

52it [00:07,  6.61it/s]


Epoch:3 Step:2480 Val_loss:0.624163
Epoch:3 Step:2488 Training_loss:0.589680 Training_loss_avg:0.607494
Epoch:3 Step:2496 Training_loss:0.594311 Training_loss_avg:0.607570
Epoch:3 Step:2504 Training_loss:0.754189 Training_loss_avg:0.608929
Epoch:3 Step:2512 Training_loss:0.637747 Training_loss_avg:0.607928
Epoch:3 Step:2520 Training_loss:0.423723 Training_loss_avg:0.605050
Epoch:3 Step:2528 Training_loss:0.803054 Training_loss_avg:0.610180
Epoch:3 Step:2536 Training_loss:0.684290 Training_loss_avg:0.612083
Epoch:3 Step:2544 Training_loss:0.610268 Training_loss_avg:0.613548
Epoch:3 Step:2552 Training_loss:0.618863 Training_loss_avg:0.613394
Epoch:3 Step:2560 Training_loss:0.756806 Training_loss_avg:0.619543
Epoch:3 Step:2568 Training_loss:0.416507 Training_loss_avg:0.616031
Epoch:3 Step:2576 Training_loss:0.498056 Training_loss_avg:0.618173
Epoch:3 Step:2584 Training_loss:0.507552 Training_loss_avg:0.619368
Epoch:3 Step:2592 Training_loss:0.534211 Training_loss_avg:0.614148
Epoch:3 Step

52it [00:07,  6.62it/s]


Epoch:3 Step:2728 Val_loss:0.627439
Epoch:3 Step:2736 Training_loss:0.744741 Training_loss_avg:0.614884
Epoch:3 Step:2744 Training_loss:0.631606 Training_loss_avg:0.612049
Epoch:3 Step:2752 Training_loss:0.719733 Training_loss_avg:0.615035
Epoch:3 Step:2760 Training_loss:0.641620 Training_loss_avg:0.617246
Epoch:3 Step:2768 Training_loss:0.629505 Training_loss_avg:0.616497
Epoch:3 Step:2776 Training_loss:0.541967 Training_loss_avg:0.613018
Epoch:3 Step:2784 Training_loss:0.642990 Training_loss_avg:0.617057
Epoch:3 Step:2792 Training_loss:0.657565 Training_loss_avg:0.620477
Epoch:3 Step:2800 Training_loss:0.682085 Training_loss_avg:0.620329
Epoch:3 Step:2808 Training_loss:0.668891 Training_loss_avg:0.620972
Epoch:3 Step:2816 Training_loss:0.608633 Training_loss_avg:0.620048
Epoch:3 Step:2824 Training_loss:0.564005 Training_loss_avg:0.618108
Epoch:3 Step:2832 Training_loss:0.573010 Training_loss_avg:0.618375
Epoch:3 Step:2840 Training_loss:0.522260 Training_loss_avg:0.618760
Epoch:3 Step

52it [00:07,  6.62it/s]


Epoch:3 Step:2976 Val_loss:0.630033
Epoch:3 Step:2984 Training_loss:0.557218 Training_loss_avg:0.616854
Epoch:3 Step:2992 Training_loss:0.514975 Training_loss_avg:0.616470
Epoch:3 Step:3000 Training_loss:0.445518 Training_loss_avg:0.611340
Epoch:3 Step:3008 Training_loss:0.776866 Training_loss_avg:0.619487
Epoch:3 Step:3016 Training_loss:0.501164 Training_loss_avg:0.619882
Epoch:3 Step:3024 Training_loss:0.425025 Training_loss_avg:0.615057
Epoch:3 Step:3032 Training_loss:0.214722 Training_loss_avg:0.606678
Epoch:3 Step:3040 Training_loss:0.514196 Training_loss_avg:0.605558
Epoch:3 Step:3048 Training_loss:0.280396 Training_loss_avg:0.602133
Epoch:3 Step:3056 Training_loss:0.393986 Training_loss_avg:0.594709
Epoch:3 Step:3064 Training_loss:0.718317 Training_loss_avg:0.600515
Epoch:3 Step:3072 Training_loss:0.562419 Training_loss_avg:0.595337
Epoch:3 Step:3080 Training_loss:0.666346 Training_loss_avg:0.600238
Epoch:3 Step:3088 Training_loss:0.249366 Training_loss_avg:0.595838
Epoch:3 Step

52it [00:07,  6.61it/s]


Epoch:3 Step:3224 Val_loss:0.624533
Epoch:3 Step:3232 Training_loss:0.703691 Training_loss_avg:0.586989
Epoch:3 Step:3240 Training_loss:0.643112 Training_loss_avg:0.589406
Epoch:3 Step:3248 Training_loss:0.633607 Training_loss_avg:0.593268
Epoch:3 Step:3256 Training_loss:0.563443 Training_loss_avg:0.589703
Epoch:3 Step:3264 Training_loss:0.694125 Training_loss_avg:0.590864
Epoch:3 Step:3272 Training_loss:0.757109 Training_loss_avg:0.592751
Epoch:3 Step:3280 Training_loss:0.371764 Training_loss_avg:0.585478
Epoch:3 Step:3288 Training_loss:0.384698 Training_loss_avg:0.580818
Epoch:3 Step:3296 Training_loss:0.648731 Training_loss_avg:0.583963
Epoch:3 Step:3304 Training_loss:0.372692 Training_loss_avg:0.579977
Epoch:3 Step:3312 Training_loss:0.795684 Training_loss_avg:0.583964
Epoch:3 Step:3320 Training_loss:0.864752 Training_loss_avg:0.590207
Epoch:3 Step:3328 Training_loss:0.579746 Training_loss_avg:0.587863
Epoch:4 Step:0 Training_loss:0.710671 Training_loss_avg:0.592980
Validating:


52it [00:07,  6.62it/s]


Epoch:4 Step:0 Val_loss:0.652501
Epoch:4 Step:8 Training_loss:0.699990 Training_loss_avg:0.595120
Epoch:4 Step:16 Training_loss:0.435974 Training_loss_avg:0.596286
Epoch:4 Step:24 Training_loss:0.629764 Training_loss_avg:0.596487
Epoch:4 Step:32 Training_loss:0.632353 Training_loss_avg:0.598198
Epoch:4 Step:40 Training_loss:0.478304 Training_loss_avg:0.588218
Epoch:4 Step:48 Training_loss:0.544334 Training_loss_avg:0.587960
Epoch:4 Step:56 Training_loss:0.430421 Training_loss_avg:0.586269
Epoch:4 Step:64 Training_loss:0.629515 Training_loss_avg:0.589949
Epoch:4 Step:72 Training_loss:0.385618 Training_loss_avg:0.582124
Epoch:4 Step:80 Training_loss:0.933889 Training_loss_avg:0.590779
Epoch:4 Step:88 Training_loss:0.499238 Training_loss_avg:0.592263
Epoch:4 Step:96 Training_loss:0.645732 Training_loss_avg:0.600883
Epoch:4 Step:104 Training_loss:0.347130 Training_loss_avg:0.597542
Epoch:4 Step:112 Training_loss:0.695026 Training_loss_avg:0.605835
Epoch:4 Step:120 Training_loss:0.514325 Tr

52it [00:07,  6.63it/s]


Epoch:4 Step:248 Val_loss:0.620075
Epoch:4 Step:256 Training_loss:0.521441 Training_loss_avg:0.615959
Epoch:4 Step:264 Training_loss:0.536984 Training_loss_avg:0.613983
Epoch:4 Step:272 Training_loss:0.563318 Training_loss_avg:0.608208
Epoch:4 Step:280 Training_loss:0.604551 Training_loss_avg:0.608516
Epoch:4 Step:288 Training_loss:0.587109 Training_loss_avg:0.606135
Epoch:4 Step:296 Training_loss:0.637489 Training_loss_avg:0.604811
Epoch:4 Step:304 Training_loss:0.683446 Training_loss_avg:0.605618
Epoch:4 Step:312 Training_loss:0.496128 Training_loss_avg:0.602868
Epoch:4 Step:320 Training_loss:0.737355 Training_loss_avg:0.606347
Epoch:4 Step:328 Training_loss:0.479266 Training_loss_avg:0.602050
Epoch:4 Step:336 Training_loss:0.459586 Training_loss_avg:0.596099
Epoch:4 Step:344 Training_loss:0.524209 Training_loss_avg:0.599148
Epoch:4 Step:352 Training_loss:0.665820 Training_loss_avg:0.604770
Epoch:4 Step:360 Training_loss:0.720580 Training_loss_avg:0.606207
Epoch:4 Step:368 Training_l

52it [00:07,  6.63it/s]


Epoch:4 Step:496 Val_loss:0.700833
Epoch:4 Step:504 Training_loss:0.788863 Training_loss_avg:0.595807
Epoch:4 Step:512 Training_loss:0.523594 Training_loss_avg:0.592378
Epoch:4 Step:520 Training_loss:0.485955 Training_loss_avg:0.591811
Epoch:4 Step:528 Training_loss:0.507280 Training_loss_avg:0.590348
Epoch:4 Step:536 Training_loss:0.499534 Training_loss_avg:0.587947
Epoch:4 Step:544 Training_loss:0.579888 Training_loss_avg:0.584676
Epoch:4 Step:552 Training_loss:0.557749 Training_loss_avg:0.585305
Epoch:4 Step:560 Training_loss:0.392242 Training_loss_avg:0.582078
Epoch:4 Step:568 Training_loss:0.424952 Training_loss_avg:0.580796
Epoch:4 Step:576 Training_loss:0.498030 Training_loss_avg:0.575121
Epoch:4 Step:584 Training_loss:0.617182 Training_loss_avg:0.571769
Epoch:4 Step:592 Training_loss:0.530521 Training_loss_avg:0.570598
Epoch:4 Step:600 Training_loss:0.646320 Training_loss_avg:0.568068
Epoch:4 Step:608 Training_loss:0.568400 Training_loss_avg:0.565275
Epoch:4 Step:616 Training_l

52it [00:07,  6.63it/s]


Epoch:4 Step:744 Val_loss:0.676244
Epoch:4 Step:752 Training_loss:0.785776 Training_loss_avg:0.553255
Epoch:4 Step:760 Training_loss:0.427320 Training_loss_avg:0.547389
Epoch:4 Step:768 Training_loss:0.325910 Training_loss_avg:0.542251
Epoch:4 Step:776 Training_loss:0.626356 Training_loss_avg:0.540494
Epoch:4 Step:784 Training_loss:0.401271 Training_loss_avg:0.536030
Epoch:4 Step:792 Training_loss:0.611612 Training_loss_avg:0.537365
Epoch:4 Step:800 Training_loss:0.832799 Training_loss_avg:0.545505
Epoch:4 Step:808 Training_loss:0.559747 Training_loss_avg:0.544421
Epoch:4 Step:816 Training_loss:0.670203 Training_loss_avg:0.547482
Epoch:4 Step:824 Training_loss:0.632146 Training_loss_avg:0.554558
Epoch:4 Step:832 Training_loss:0.449919 Training_loss_avg:0.553765
Epoch:4 Step:840 Training_loss:0.519696 Training_loss_avg:0.553379
Epoch:4 Step:848 Training_loss:0.518035 Training_loss_avg:0.555439
Epoch:4 Step:856 Training_loss:0.364820 Training_loss_avg:0.553255
Epoch:4 Step:864 Training_l

52it [00:07,  6.63it/s]


Epoch:4 Step:992 Val_loss:0.654029
Epoch:4 Step:1000 Training_loss:0.703476 Training_loss_avg:0.560555
Epoch:4 Step:1008 Training_loss:0.492424 Training_loss_avg:0.559036
Epoch:4 Step:1016 Training_loss:0.646942 Training_loss_avg:0.561002
Epoch:4 Step:1024 Training_loss:0.513918 Training_loss_avg:0.561246
Epoch:4 Step:1032 Training_loss:0.553905 Training_loss_avg:0.562912
Epoch:4 Step:1040 Training_loss:0.447491 Training_loss_avg:0.557945
Epoch:4 Step:1048 Training_loss:0.647548 Training_loss_avg:0.558218
Epoch:4 Step:1056 Training_loss:0.491029 Training_loss_avg:0.558078
Epoch:4 Step:1064 Training_loss:0.393877 Training_loss_avg:0.559065
Epoch:4 Step:1072 Training_loss:0.641321 Training_loss_avg:0.560732
Epoch:4 Step:1080 Training_loss:0.474757 Training_loss_avg:0.558572
Epoch:4 Step:1088 Training_loss:0.461627 Training_loss_avg:0.551851
Epoch:4 Step:1096 Training_loss:0.859175 Training_loss_avg:0.558740
Epoch:4 Step:1104 Training_loss:0.471893 Training_loss_avg:0.557062
Epoch:4 Step:

52it [00:07,  6.63it/s]


Epoch:4 Step:1240 Val_loss:0.659388
Epoch:4 Step:1248 Training_loss:0.586523 Training_loss_avg:0.568876
Epoch:4 Step:1256 Training_loss:0.432234 Training_loss_avg:0.570224
Epoch:4 Step:1264 Training_loss:0.464938 Training_loss_avg:0.568798
Epoch:4 Step:1272 Training_loss:0.592369 Training_loss_avg:0.560854
Epoch:4 Step:1280 Training_loss:0.438610 Training_loss_avg:0.553769
Epoch:4 Step:1288 Training_loss:0.622775 Training_loss_avg:0.553022
Epoch:4 Step:1296 Training_loss:0.506860 Training_loss_avg:0.548774
Epoch:4 Step:1304 Training_loss:0.690097 Training_loss_avg:0.551514
Epoch:4 Step:1312 Training_loss:0.574910 Training_loss_avg:0.554059
Epoch:4 Step:1320 Training_loss:0.346964 Training_loss_avg:0.552469
Epoch:4 Step:1328 Training_loss:0.675032 Training_loss_avg:0.553930
Epoch:4 Step:1336 Training_loss:0.495284 Training_loss_avg:0.558506
Epoch:4 Step:1344 Training_loss:0.606194 Training_loss_avg:0.557821
Epoch:4 Step:1352 Training_loss:0.616358 Training_loss_avg:0.562309
Epoch:4 Step

52it [00:07,  6.63it/s]


Epoch:4 Step:1488 Val_loss:0.633952
Epoch:4 Step:1496 Training_loss:0.585069 Training_loss_avg:0.587776
Epoch:4 Step:1504 Training_loss:0.432777 Training_loss_avg:0.586994
Epoch:4 Step:1512 Training_loss:0.478319 Training_loss_avg:0.585362
Epoch:4 Step:1520 Training_loss:0.434014 Training_loss_avg:0.581409
Epoch:4 Step:1528 Training_loss:0.752851 Training_loss_avg:0.587076
Epoch:4 Step:1536 Training_loss:0.637349 Training_loss_avg:0.591554
Epoch:4 Step:1544 Training_loss:0.810382 Training_loss_avg:0.598855
Epoch:4 Step:1552 Training_loss:0.700440 Training_loss_avg:0.599356
Epoch:4 Step:1560 Training_loss:0.555880 Training_loss_avg:0.596825
Epoch:4 Step:1568 Training_loss:0.522789 Training_loss_avg:0.593083
Epoch:4 Step:1576 Training_loss:0.442596 Training_loss_avg:0.591653
Epoch:4 Step:1584 Training_loss:0.494116 Training_loss_avg:0.589359
Epoch:4 Step:1592 Training_loss:0.622747 Training_loss_avg:0.593276
Epoch:4 Step:1600 Training_loss:0.451954 Training_loss_avg:0.584620
Epoch:4 Step

52it [00:07,  6.63it/s]


Epoch:4 Step:1736 Val_loss:0.627100
Epoch:4 Step:1744 Training_loss:0.684180 Training_loss_avg:0.614320
Epoch:4 Step:1752 Training_loss:0.463427 Training_loss_avg:0.611261
Epoch:4 Step:1760 Training_loss:0.493437 Training_loss_avg:0.611023
Epoch:4 Step:1768 Training_loss:0.603943 Training_loss_avg:0.608209
Epoch:4 Step:1776 Training_loss:0.697447 Training_loss_avg:0.603786
Epoch:4 Step:1784 Training_loss:0.674115 Training_loss_avg:0.601930
Epoch:4 Step:1792 Training_loss:0.504650 Training_loss_avg:0.598572
Epoch:4 Step:1800 Training_loss:0.697791 Training_loss_avg:0.603427
Epoch:4 Step:1808 Training_loss:0.778852 Training_loss_avg:0.612456
Epoch:4 Step:1816 Training_loss:0.635131 Training_loss_avg:0.616895
Epoch:4 Step:1824 Training_loss:0.643612 Training_loss_avg:0.619696
Epoch:4 Step:1832 Training_loss:0.544830 Training_loss_avg:0.619458
Epoch:4 Step:1840 Training_loss:0.724436 Training_loss_avg:0.622876
Epoch:4 Step:1848 Training_loss:0.744581 Training_loss_avg:0.622680
Epoch:4 Step

52it [00:07,  6.63it/s]


Epoch:4 Step:1984 Val_loss:0.686756
Epoch:4 Step:1992 Training_loss:0.533561 Training_loss_avg:0.618459
Epoch:4 Step:2000 Training_loss:0.524531 Training_loss_avg:0.619910
Epoch:4 Step:2008 Training_loss:0.649294 Training_loss_avg:0.621390
Epoch:4 Step:2016 Training_loss:0.414484 Training_loss_avg:0.618263
Epoch:4 Step:2024 Training_loss:0.511969 Training_loss_avg:0.618492
Epoch:4 Step:2032 Training_loss:0.701238 Training_loss_avg:0.622719
Epoch:4 Step:2040 Training_loss:0.362865 Training_loss_avg:0.619318
Epoch:4 Step:2048 Training_loss:0.727618 Training_loss_avg:0.619570
Epoch:4 Step:2056 Training_loss:0.673527 Training_loss_avg:0.619056
Epoch:4 Step:2064 Training_loss:0.416506 Training_loss_avg:0.615836
Epoch:4 Step:2072 Training_loss:0.491908 Training_loss_avg:0.612486
Epoch:4 Step:2080 Training_loss:0.542846 Training_loss_avg:0.614359
Epoch:4 Step:2088 Training_loss:0.664954 Training_loss_avg:0.618602
Epoch:4 Step:2096 Training_loss:0.661284 Training_loss_avg:0.620226
Epoch:4 Step

52it [00:07,  6.63it/s]


Epoch:4 Step:2232 Val_loss:0.637083
Epoch:4 Step:2240 Training_loss:0.500622 Training_loss_avg:0.587490
Epoch:4 Step:2248 Training_loss:0.763788 Training_loss_avg:0.587874
Epoch:4 Step:2256 Training_loss:0.815932 Training_loss_avg:0.593991
Epoch:4 Step:2264 Training_loss:0.423712 Training_loss_avg:0.590832
Epoch:4 Step:2272 Training_loss:0.841731 Training_loss_avg:0.596532
Epoch:4 Step:2280 Training_loss:0.443067 Training_loss_avg:0.595681
Epoch:4 Step:2288 Training_loss:0.628397 Training_loss_avg:0.597655
Epoch:4 Step:2296 Training_loss:0.719637 Training_loss_avg:0.601259
Epoch:4 Step:2304 Training_loss:0.729586 Training_loss_avg:0.607359
Epoch:4 Step:2312 Training_loss:0.492653 Training_loss_avg:0.609344
Epoch:4 Step:2320 Training_loss:0.531197 Training_loss_avg:0.604650
Epoch:4 Step:2328 Training_loss:0.635971 Training_loss_avg:0.611371
Epoch:4 Step:2336 Training_loss:0.719300 Training_loss_avg:0.610016
Epoch:4 Step:2344 Training_loss:0.501268 Training_loss_avg:0.607239
Epoch:4 Step

52it [00:07,  6.63it/s]


Epoch:4 Step:2480 Val_loss:0.659978
Epoch:4 Step:2488 Training_loss:0.707485 Training_loss_avg:0.590390
Epoch:4 Step:2496 Training_loss:0.501055 Training_loss_avg:0.587186
Epoch:4 Step:2504 Training_loss:0.563974 Training_loss_avg:0.586754
Epoch:4 Step:2512 Training_loss:0.451216 Training_loss_avg:0.585822
Epoch:4 Step:2520 Training_loss:0.558946 Training_loss_avg:0.585301
Epoch:4 Step:2528 Training_loss:0.630232 Training_loss_avg:0.587932
Epoch:4 Step:2536 Training_loss:0.774906 Training_loss_avg:0.591281
Epoch:4 Step:2544 Training_loss:0.795846 Training_loss_avg:0.592018
Epoch:4 Step:2552 Training_loss:0.447924 Training_loss_avg:0.588148
Epoch:4 Step:2560 Training_loss:0.530831 Training_loss_avg:0.583669
Epoch:4 Step:2568 Training_loss:0.610176 Training_loss_avg:0.585683
Epoch:4 Step:2576 Training_loss:0.574389 Training_loss_avg:0.584682
Epoch:4 Step:2584 Training_loss:0.599079 Training_loss_avg:0.587546
Epoch:4 Step:2592 Training_loss:0.402971 Training_loss_avg:0.584027
Epoch:4 Step

52it [00:07,  6.63it/s]


Epoch:4 Step:2728 Val_loss:0.671737
Epoch:4 Step:2736 Training_loss:0.482876 Training_loss_avg:0.558011
Epoch:4 Step:2744 Training_loss:0.458865 Training_loss_avg:0.557163
Epoch:4 Step:2752 Training_loss:0.535264 Training_loss_avg:0.556717
Epoch:4 Step:2760 Training_loss:0.761555 Training_loss_avg:0.562984
Epoch:4 Step:2768 Training_loss:0.608916 Training_loss_avg:0.565271
Epoch:4 Step:2776 Training_loss:0.498743 Training_loss_avg:0.562025
Epoch:4 Step:2784 Training_loss:0.578489 Training_loss_avg:0.562496
Epoch:4 Step:2792 Training_loss:0.412285 Training_loss_avg:0.561376
Epoch:4 Step:2800 Training_loss:0.898313 Training_loss_avg:0.568358
Epoch:4 Step:2808 Training_loss:0.666398 Training_loss_avg:0.570529
Epoch:4 Step:2816 Training_loss:0.670852 Training_loss_avg:0.568040
Epoch:4 Step:2824 Training_loss:0.930178 Training_loss_avg:0.576985
Epoch:4 Step:2832 Training_loss:0.567813 Training_loss_avg:0.577362
Epoch:4 Step:2840 Training_loss:0.648222 Training_loss_avg:0.578069
Epoch:4 Step

52it [00:07,  6.63it/s]


Epoch:4 Step:2976 Val_loss:0.629802
Epoch:4 Step:2984 Training_loss:0.619787 Training_loss_avg:0.584838
Epoch:4 Step:2992 Training_loss:0.647947 Training_loss_avg:0.589737
Epoch:4 Step:3000 Training_loss:0.617688 Training_loss_avg:0.591133
Epoch:4 Step:3008 Training_loss:0.601061 Training_loss_avg:0.594489
Epoch:4 Step:3016 Training_loss:0.583319 Training_loss_avg:0.598138
Epoch:4 Step:3024 Training_loss:0.683855 Training_loss_avg:0.603098
Epoch:4 Step:3032 Training_loss:0.393795 Training_loss_avg:0.596150
Epoch:4 Step:3040 Training_loss:0.582735 Training_loss_avg:0.597588
Epoch:4 Step:3048 Training_loss:0.732780 Training_loss_avg:0.601535
Epoch:4 Step:3056 Training_loss:0.665832 Training_loss_avg:0.605247
Epoch:4 Step:3064 Training_loss:0.712657 Training_loss_avg:0.605061
Epoch:4 Step:3072 Training_loss:0.568306 Training_loss_avg:0.605575
Epoch:4 Step:3080 Training_loss:0.588294 Training_loss_avg:0.605305
Epoch:4 Step:3088 Training_loss:0.610042 Training_loss_avg:0.609697
Epoch:4 Step

52it [00:07,  6.63it/s]


Epoch:4 Step:3224 Val_loss:0.632515
Epoch:4 Step:3232 Training_loss:0.455746 Training_loss_avg:0.586417
Epoch:4 Step:3240 Training_loss:0.642683 Training_loss_avg:0.586306
Epoch:4 Step:3248 Training_loss:0.489930 Training_loss_avg:0.583985
Epoch:4 Step:3256 Training_loss:0.541473 Training_loss_avg:0.583974
Epoch:4 Step:3264 Training_loss:0.581634 Training_loss_avg:0.584579
Epoch:4 Step:3272 Training_loss:0.430257 Training_loss_avg:0.580067
Epoch:4 Step:3280 Training_loss:0.346285 Training_loss_avg:0.577703
Epoch:4 Step:3288 Training_loss:0.764727 Training_loss_avg:0.581570
Epoch:4 Step:3296 Training_loss:0.914209 Training_loss_avg:0.584773
Epoch:4 Step:3304 Training_loss:0.865335 Training_loss_avg:0.590046
Epoch:4 Step:3312 Training_loss:0.576805 Training_loss_avg:0.587591
Epoch:4 Step:3320 Training_loss:0.410078 Training_loss_avg:0.578117
Epoch:4 Step:3328 Training_loss:0.713447 Training_loss_avg:0.585946
Epoch:5 Step:0 Training_loss:0.613108 Training_loss_avg:0.583974
Validating:


52it [00:07,  6.63it/s]


Epoch:5 Step:0 Val_loss:0.633493
Epoch:5 Step:8 Training_loss:0.563671 Training_loss_avg:0.582899
Epoch:5 Step:16 Training_loss:0.464798 Training_loss_avg:0.579010
Epoch:5 Step:24 Training_loss:0.634023 Training_loss_avg:0.578586
Epoch:5 Step:32 Training_loss:0.512837 Training_loss_avg:0.578071
Epoch:5 Step:40 Training_loss:0.456777 Training_loss_avg:0.579336
Epoch:5 Step:48 Training_loss:0.614971 Training_loss_avg:0.579240
Epoch:5 Step:56 Training_loss:0.520986 Training_loss_avg:0.576701
Epoch:5 Step:64 Training_loss:0.626555 Training_loss_avg:0.576878
Epoch:5 Step:72 Training_loss:0.512419 Training_loss_avg:0.575105
Epoch:5 Step:80 Training_loss:0.671884 Training_loss_avg:0.576876
Epoch:5 Step:88 Training_loss:0.615212 Training_loss_avg:0.575504
Epoch:5 Step:96 Training_loss:0.493551 Training_loss_avg:0.577499
Epoch:5 Step:104 Training_loss:0.480153 Training_loss_avg:0.575447
Epoch:5 Step:112 Training_loss:0.577207 Training_loss_avg:0.572336
Epoch:5 Step:120 Training_loss:0.686789 Tr

52it [00:07,  6.63it/s]


Epoch:5 Step:248 Val_loss:0.688046
Epoch:5 Step:256 Training_loss:0.300429 Training_loss_avg:0.544199
Epoch:5 Step:264 Training_loss:0.676393 Training_loss_avg:0.545396
Epoch:5 Step:272 Training_loss:0.790633 Training_loss_avg:0.551681
Epoch:5 Step:280 Training_loss:0.295982 Training_loss_avg:0.550535
Epoch:5 Step:288 Training_loss:0.692167 Training_loss_avg:0.553303
Epoch:5 Step:296 Training_loss:0.874591 Training_loss_avg:0.561680
Epoch:5 Step:304 Training_loss:0.544588 Training_loss_avg:0.559718
Epoch:5 Step:312 Training_loss:0.319124 Training_loss_avg:0.556302
Epoch:5 Step:320 Training_loss:0.608296 Training_loss_avg:0.557638
Epoch:5 Step:328 Training_loss:0.459320 Training_loss_avg:0.555192
Epoch:5 Step:336 Training_loss:0.442696 Training_loss_avg:0.555441
Epoch:5 Step:344 Training_loss:0.431008 Training_loss_avg:0.557135
Epoch:5 Step:352 Training_loss:0.399836 Training_loss_avg:0.549837
Epoch:5 Step:360 Training_loss:0.663662 Training_loss_avg:0.544826
Epoch:5 Step:368 Training_l

52it [00:07,  6.61it/s]


Epoch:5 Step:496 Val_loss:0.675427
Epoch:5 Step:504 Training_loss:0.595048 Training_loss_avg:0.518291
Epoch:5 Step:512 Training_loss:0.709336 Training_loss_avg:0.520934
Epoch:5 Step:520 Training_loss:0.702263 Training_loss_avg:0.521243
Epoch:5 Step:528 Training_loss:0.377621 Training_loss_avg:0.515420
Epoch:5 Step:536 Training_loss:0.472664 Training_loss_avg:0.509657
Epoch:5 Step:544 Training_loss:0.809668 Training_loss_avg:0.512178
Epoch:5 Step:552 Training_loss:0.427716 Training_loss_avg:0.512273
Epoch:5 Step:560 Training_loss:0.554506 Training_loss_avg:0.516657
Epoch:5 Step:568 Training_loss:0.334791 Training_loss_avg:0.514262
Epoch:5 Step:576 Training_loss:0.943804 Training_loss_avg:0.522317
Epoch:5 Step:584 Training_loss:0.468050 Training_loss_avg:0.520235
Epoch:5 Step:592 Training_loss:0.672623 Training_loss_avg:0.524029
Epoch:5 Step:600 Training_loss:0.519912 Training_loss_avg:0.524087
Epoch:5 Step:608 Training_loss:0.428337 Training_loss_avg:0.522571
Epoch:5 Step:616 Training_l

52it [00:07,  6.63it/s]


Epoch:5 Step:744 Val_loss:0.647006
Epoch:5 Step:752 Training_loss:0.640361 Training_loss_avg:0.552697
Epoch:5 Step:760 Training_loss:0.652842 Training_loss_avg:0.552480
Epoch:5 Step:768 Training_loss:0.409583 Training_loss_avg:0.550321
Epoch:5 Step:776 Training_loss:0.542101 Training_loss_avg:0.553589
Epoch:5 Step:784 Training_loss:0.629976 Training_loss_avg:0.556005
Epoch:5 Step:792 Training_loss:0.615134 Training_loss_avg:0.559781
Epoch:5 Step:800 Training_loss:0.588307 Training_loss_avg:0.560390
Epoch:5 Step:808 Training_loss:0.543427 Training_loss_avg:0.562796
Epoch:5 Step:816 Training_loss:0.555286 Training_loss_avg:0.560101
Epoch:5 Step:824 Training_loss:0.407327 Training_loss_avg:0.555914
Epoch:5 Step:832 Training_loss:0.625528 Training_loss_avg:0.557015
Epoch:5 Step:840 Training_loss:0.740917 Training_loss_avg:0.562231
Epoch:5 Step:848 Training_loss:0.306336 Training_loss_avg:0.555924
Epoch:5 Step:856 Training_loss:0.514368 Training_loss_avg:0.557676
Epoch:5 Step:864 Training_l

52it [00:07,  6.63it/s]


Epoch:5 Step:992 Val_loss:0.657575
Epoch:5 Step:1000 Training_loss:0.532599 Training_loss_avg:0.561413
Epoch:5 Step:1008 Training_loss:0.672949 Training_loss_avg:0.566305
Epoch:5 Step:1016 Training_loss:0.658940 Training_loss_avg:0.566165
Epoch:5 Step:1024 Training_loss:0.725488 Training_loss_avg:0.565747
Epoch:5 Step:1032 Training_loss:0.517617 Training_loss_avg:0.563532
Epoch:5 Step:1040 Training_loss:0.404186 Training_loss_avg:0.560266
Epoch:5 Step:1048 Training_loss:0.602841 Training_loss_avg:0.559920
Epoch:5 Step:1056 Training_loss:0.648096 Training_loss_avg:0.561915
Epoch:5 Step:1064 Training_loss:0.487995 Training_loss_avg:0.564337
Epoch:5 Step:1072 Training_loss:0.414323 Training_loss_avg:0.559959
Epoch:5 Step:1080 Training_loss:0.387108 Training_loss_avg:0.551387
Epoch:5 Step:1088 Training_loss:0.401913 Training_loss_avg:0.548257
Epoch:5 Step:1096 Training_loss:0.437562 Training_loss_avg:0.546863
Epoch:5 Step:1104 Training_loss:0.849746 Training_loss_avg:0.556746
Epoch:5 Step:

52it [00:07,  6.63it/s]


Epoch:5 Step:1240 Val_loss:0.659605
Epoch:5 Step:1248 Training_loss:0.567408 Training_loss_avg:0.554637
Epoch:5 Step:1256 Training_loss:0.425973 Training_loss_avg:0.552869
Epoch:5 Step:1264 Training_loss:0.758209 Training_loss_avg:0.556250
Epoch:5 Step:1272 Training_loss:0.520313 Training_loss_avg:0.553312
Epoch:5 Step:1280 Training_loss:0.712814 Training_loss_avg:0.556895
Epoch:5 Step:1288 Training_loss:0.375612 Training_loss_avg:0.552353
Epoch:5 Step:1296 Training_loss:0.566443 Training_loss_avg:0.548714
Epoch:5 Step:1304 Training_loss:0.460234 Training_loss_avg:0.548251
Epoch:5 Step:1312 Training_loss:0.595722 Training_loss_avg:0.553621
Epoch:5 Step:1320 Training_loss:0.447886 Training_loss_avg:0.550083
Epoch:5 Step:1328 Training_loss:0.654650 Training_loss_avg:0.549498
Epoch:5 Step:1336 Training_loss:1.197690 Training_loss_avg:0.560571
Epoch:5 Step:1344 Training_loss:0.389819 Training_loss_avg:0.558697
Epoch:5 Step:1352 Training_loss:0.956049 Training_loss_avg:0.570371
Epoch:5 Step

52it [00:07,  6.63it/s]


Epoch:5 Step:1488 Val_loss:0.642942
Epoch:5 Step:1496 Training_loss:0.492122 Training_loss_avg:0.572887
Epoch:5 Step:1504 Training_loss:0.370703 Training_loss_avg:0.563306
Epoch:5 Step:1512 Training_loss:0.383141 Training_loss_avg:0.561088
Epoch:5 Step:1520 Training_loss:0.534426 Training_loss_avg:0.560156
Epoch:5 Step:1528 Training_loss:0.494067 Training_loss_avg:0.550159
Epoch:5 Step:1536 Training_loss:0.367357 Training_loss_avg:0.547844
Epoch:5 Step:1544 Training_loss:0.632700 Training_loss_avg:0.542303
Epoch:5 Step:1552 Training_loss:0.659395 Training_loss_avg:0.548562
Epoch:5 Step:1560 Training_loss:0.384967 Training_loss_avg:0.548929
Epoch:5 Step:1568 Training_loss:0.575720 Training_loss_avg:0.549504
Epoch:5 Step:1576 Training_loss:0.542930 Training_loss_avg:0.550091
Epoch:5 Step:1584 Training_loss:0.851297 Training_loss_avg:0.561487
Epoch:5 Step:1592 Training_loss:0.629546 Training_loss_avg:0.562047
Epoch:5 Step:1600 Training_loss:0.497522 Training_loss_avg:0.561056
Epoch:5 Step

52it [00:07,  6.62it/s]


Epoch:5 Step:1736 Val_loss:0.652074
Epoch:5 Step:1744 Training_loss:0.544443 Training_loss_avg:0.528522
Epoch:5 Step:1752 Training_loss:0.473864 Training_loss_avg:0.518878
Epoch:5 Step:1760 Training_loss:0.657488 Training_loss_avg:0.527157
Epoch:5 Step:1768 Training_loss:0.513998 Training_loss_avg:0.524359
Epoch:5 Step:1776 Training_loss:0.614260 Training_loss_avg:0.524367
Epoch:5 Step:1784 Training_loss:0.377159 Training_loss_avg:0.520958
Epoch:5 Step:1792 Training_loss:0.465018 Training_loss_avg:0.514990
Epoch:5 Step:1800 Training_loss:0.422739 Training_loss_avg:0.514791
Epoch:5 Step:1808 Training_loss:0.432433 Training_loss_avg:0.511881
Epoch:5 Step:1816 Training_loss:0.520030 Training_loss_avg:0.513737
Epoch:5 Step:1824 Training_loss:0.792637 Training_loss_avg:0.516371
Epoch:5 Step:1832 Training_loss:0.596401 Training_loss_avg:0.517171
Epoch:5 Step:1840 Training_loss:0.385125 Training_loss_avg:0.513759
Epoch:5 Step:1848 Training_loss:0.693939 Training_loss_avg:0.519668
Epoch:5 Step

52it [00:07,  6.61it/s]


Epoch:5 Step:1984 Val_loss:0.693959
Epoch:5 Step:1992 Training_loss:0.629403 Training_loss_avg:0.525939
Epoch:5 Step:2000 Training_loss:0.701780 Training_loss_avg:0.530025
Epoch:5 Step:2008 Training_loss:0.497963 Training_loss_avg:0.527492
Epoch:5 Step:2016 Training_loss:0.455437 Training_loss_avg:0.525179
Epoch:5 Step:2024 Training_loss:0.614831 Training_loss_avg:0.526367
Epoch:5 Step:2032 Training_loss:0.556306 Training_loss_avg:0.530984
Epoch:5 Step:2040 Training_loss:0.470704 Training_loss_avg:0.531303
Epoch:5 Step:2048 Training_loss:0.502494 Training_loss_avg:0.535824
Epoch:5 Step:2056 Training_loss:0.196733 Training_loss_avg:0.534394
Epoch:5 Step:2064 Training_loss:0.446961 Training_loss_avg:0.527723
Epoch:5 Step:2072 Training_loss:0.510619 Training_loss_avg:0.529890
Epoch:5 Step:2080 Training_loss:0.708302 Training_loss_avg:0.535416
Epoch:5 Step:2088 Training_loss:0.559417 Training_loss_avg:0.535038
Epoch:5 Step:2096 Training_loss:0.484567 Training_loss_avg:0.535348
Epoch:5 Step

52it [00:07,  6.62it/s]


Epoch:5 Step:2232 Val_loss:0.663883
Epoch:5 Step:2240 Training_loss:0.393174 Training_loss_avg:0.528147
Epoch:5 Step:2248 Training_loss:0.437998 Training_loss_avg:0.523029
Epoch:5 Step:2256 Training_loss:0.376690 Training_loss_avg:0.514327
Epoch:5 Step:2264 Training_loss:0.537149 Training_loss_avg:0.513711
Epoch:5 Step:2272 Training_loss:0.592359 Training_loss_avg:0.516707
Epoch:5 Step:2280 Training_loss:0.911667 Training_loss_avg:0.527583
Epoch:5 Step:2288 Training_loss:0.369317 Training_loss_avg:0.528757
Epoch:5 Step:2296 Training_loss:0.950330 Training_loss_avg:0.534341
Epoch:5 Step:2304 Training_loss:0.520053 Training_loss_avg:0.536465
Epoch:5 Step:2312 Training_loss:0.525284 Training_loss_avg:0.536142
Epoch:5 Step:2320 Training_loss:0.299472 Training_loss_avg:0.526596
Epoch:5 Step:2328 Training_loss:0.486653 Training_loss_avg:0.522664
Epoch:5 Step:2336 Training_loss:0.604526 Training_loss_avg:0.523660
Epoch:5 Step:2344 Training_loss:0.540473 Training_loss_avg:0.522040
Epoch:5 Step

52it [00:07,  6.61it/s]


Epoch:5 Step:2480 Val_loss:0.679984
Epoch:5 Step:2488 Training_loss:0.397965 Training_loss_avg:0.537156
Epoch:5 Step:2496 Training_loss:0.720697 Training_loss_avg:0.541879
Epoch:5 Step:2504 Training_loss:0.441947 Training_loss_avg:0.541157
Epoch:5 Step:2512 Training_loss:0.464858 Training_loss_avg:0.540315
Epoch:5 Step:2520 Training_loss:0.620565 Training_loss_avg:0.536147
Epoch:5 Step:2528 Training_loss:0.616794 Training_loss_avg:0.538606
Epoch:5 Step:2536 Training_loss:0.640645 Training_loss_avg:0.540142
Epoch:5 Step:2544 Training_loss:0.493188 Training_loss_avg:0.538942
Epoch:5 Step:2552 Training_loss:0.629665 Training_loss_avg:0.537658
Epoch:5 Step:2560 Training_loss:0.515472 Training_loss_avg:0.535343
Epoch:5 Step:2568 Training_loss:0.531763 Training_loss_avg:0.541181
Epoch:5 Step:2576 Training_loss:0.596137 Training_loss_avg:0.544656
Epoch:5 Step:2584 Training_loss:0.633322 Training_loss_avg:0.547376
Epoch:5 Step:2592 Training_loss:0.394937 Training_loss_avg:0.541572
Epoch:5 Step

52it [00:07,  6.61it/s]


Epoch:5 Step:2728 Val_loss:0.679581
Epoch:5 Step:2736 Training_loss:0.299845 Training_loss_avg:0.522961
Epoch:5 Step:2744 Training_loss:0.501516 Training_loss_avg:0.522182
Epoch:5 Step:2752 Training_loss:0.721239 Training_loss_avg:0.524955
Epoch:5 Step:2760 Training_loss:0.508389 Training_loss_avg:0.521019
Epoch:5 Step:2768 Training_loss:0.581670 Training_loss_avg:0.522252
Epoch:5 Step:2776 Training_loss:0.797837 Training_loss_avg:0.526486
Epoch:5 Step:2784 Training_loss:0.645421 Training_loss_avg:0.531018
Epoch:5 Step:2792 Training_loss:0.539620 Training_loss_avg:0.526633
Epoch:5 Step:2800 Training_loss:0.155115 Training_loss_avg:0.516996
Epoch:5 Step:2808 Training_loss:0.596582 Training_loss_avg:0.517016
Epoch:5 Step:2816 Training_loss:0.430875 Training_loss_avg:0.514653
Epoch:5 Step:2824 Training_loss:0.482940 Training_loss_avg:0.511827
Epoch:5 Step:2832 Training_loss:0.487797 Training_loss_avg:0.513749
Epoch:5 Step:2840 Training_loss:0.420447 Training_loss_avg:0.512678
Epoch:5 Step

52it [00:07,  6.63it/s]


Epoch:5 Step:2976 Val_loss:0.658449
Epoch:5 Step:2984 Training_loss:0.429205 Training_loss_avg:0.476204
Epoch:5 Step:2992 Training_loss:0.411655 Training_loss_avg:0.476538
Epoch:5 Step:3000 Training_loss:0.630888 Training_loss_avg:0.476699
Epoch:5 Step:3008 Training_loss:0.482639 Training_loss_avg:0.476560
Epoch:5 Step:3016 Training_loss:0.517485 Training_loss_avg:0.477433
Epoch:5 Step:3024 Training_loss:0.634625 Training_loss_avg:0.484519
Epoch:5 Step:3032 Training_loss:0.563338 Training_loss_avg:0.486783
Epoch:5 Step:3040 Training_loss:0.780008 Training_loss_avg:0.489341
Epoch:5 Step:3048 Training_loss:0.367327 Training_loss_avg:0.487168
Epoch:5 Step:3056 Training_loss:0.426690 Training_loss_avg:0.486530
Epoch:5 Step:3064 Training_loss:0.409503 Training_loss_avg:0.485498
Epoch:5 Step:3072 Training_loss:0.506391 Training_loss_avg:0.488103
Epoch:5 Step:3080 Training_loss:0.483588 Training_loss_avg:0.491310
Epoch:5 Step:3088 Training_loss:0.663574 Training_loss_avg:0.494449
Epoch:5 Step

52it [00:07,  6.63it/s]


Epoch:5 Step:3224 Val_loss:0.629229
Epoch:5 Step:3232 Training_loss:0.387980 Training_loss_avg:0.527022
Epoch:5 Step:3240 Training_loss:0.451603 Training_loss_avg:0.527646
Epoch:5 Step:3248 Training_loss:0.512267 Training_loss_avg:0.530229
Epoch:5 Step:3256 Training_loss:0.477143 Training_loss_avg:0.526655
Epoch:5 Step:3264 Training_loss:0.527303 Training_loss_avg:0.529218
Epoch:5 Step:3272 Training_loss:0.492399 Training_loss_avg:0.533425
Epoch:5 Step:3280 Training_loss:0.561204 Training_loss_avg:0.540965
Epoch:5 Step:3288 Training_loss:0.531384 Training_loss_avg:0.542504
Epoch:5 Step:3296 Training_loss:0.387391 Training_loss_avg:0.535796
Epoch:5 Step:3304 Training_loss:0.311434 Training_loss_avg:0.528992
Epoch:5 Step:3312 Training_loss:0.548642 Training_loss_avg:0.531163
Epoch:5 Step:3320 Training_loss:0.542513 Training_loss_avg:0.534654
Epoch:5 Step:3328 Training_loss:0.872376 Training_loss_avg:0.547150
Epoch:6 Step:0 Training_loss:0.400428 Training_loss_avg:0.544035
Validating:


52it [00:07,  6.63it/s]


Epoch:6 Step:0 Val_loss:0.704358
Epoch:6 Step:8 Training_loss:0.343208 Training_loss_avg:0.538521
Epoch:6 Step:16 Training_loss:0.578955 Training_loss_avg:0.539041
Epoch:6 Step:24 Training_loss:0.811059 Training_loss_avg:0.546861
Epoch:6 Step:32 Training_loss:0.729542 Training_loss_avg:0.549529
Epoch:6 Step:40 Training_loss:0.537316 Training_loss_avg:0.550203
Epoch:6 Step:48 Training_loss:0.487009 Training_loss_avg:0.551359
Epoch:6 Step:56 Training_loss:0.282739 Training_loss_avg:0.548781
Epoch:6 Step:64 Training_loss:0.510016 Training_loss_avg:0.546364
Epoch:6 Step:72 Training_loss:0.440425 Training_loss_avg:0.545519
Epoch:6 Step:80 Training_loss:0.549826 Training_loss_avg:0.546166
Epoch:6 Step:88 Training_loss:0.669819 Training_loss_avg:0.546870
Epoch:6 Step:96 Training_loss:0.442918 Training_loss_avg:0.544462
Epoch:6 Step:104 Training_loss:0.316890 Training_loss_avg:0.535199
Epoch:6 Step:112 Training_loss:0.495343 Training_loss_avg:0.537760
Epoch:6 Step:120 Training_loss:0.444281 Tr

52it [00:07,  6.63it/s]


Epoch:6 Step:248 Val_loss:0.675073
Epoch:6 Step:256 Training_loss:0.643703 Training_loss_avg:0.484828
Epoch:6 Step:264 Training_loss:0.552136 Training_loss_avg:0.483771
Epoch:6 Step:272 Training_loss:0.731566 Training_loss_avg:0.487135
Epoch:6 Step:280 Training_loss:0.728877 Training_loss_avg:0.492664
Epoch:6 Step:288 Training_loss:0.503681 Training_loss_avg:0.491872
Epoch:6 Step:296 Training_loss:0.490169 Training_loss_avg:0.493915
Epoch:6 Step:304 Training_loss:0.472398 Training_loss_avg:0.494331
Epoch:6 Step:312 Training_loss:0.323539 Training_loss_avg:0.490557
Epoch:6 Step:320 Training_loss:0.320044 Training_loss_avg:0.487415
Epoch:6 Step:328 Training_loss:0.690914 Training_loss_avg:0.490687
Epoch:6 Step:336 Training_loss:0.670985 Training_loss_avg:0.494259
Epoch:6 Step:344 Training_loss:0.347541 Training_loss_avg:0.489985
Epoch:6 Step:352 Training_loss:0.595682 Training_loss_avg:0.491271
Epoch:6 Step:360 Training_loss:0.405951 Training_loss_avg:0.491643
Epoch:6 Step:368 Training_l

52it [00:07,  6.63it/s]


Epoch:6 Step:496 Val_loss:0.695802
Epoch:6 Step:504 Training_loss:0.405987 Training_loss_avg:0.472693
Epoch:6 Step:512 Training_loss:0.549140 Training_loss_avg:0.473769
Epoch:6 Step:520 Training_loss:0.618591 Training_loss_avg:0.477255
Epoch:6 Step:528 Training_loss:0.538877 Training_loss_avg:0.480485
Epoch:6 Step:536 Training_loss:0.414477 Training_loss_avg:0.478251
Epoch:6 Step:544 Training_loss:0.504340 Training_loss_avg:0.480258
Epoch:6 Step:552 Training_loss:0.558693 Training_loss_avg:0.481642
Epoch:6 Step:560 Training_loss:0.711803 Training_loss_avg:0.491815
Epoch:6 Step:568 Training_loss:0.306337 Training_loss_avg:0.491844
Epoch:6 Step:576 Training_loss:0.369844 Training_loss_avg:0.485642
Epoch:6 Step:584 Training_loss:0.384249 Training_loss_avg:0.486022
Epoch:6 Step:592 Training_loss:0.554470 Training_loss_avg:0.487858
Epoch:6 Step:600 Training_loss:0.317308 Training_loss_avg:0.486549
Epoch:6 Step:608 Training_loss:0.368874 Training_loss_avg:0.488965
Epoch:6 Step:616 Training_l

52it [00:07,  6.63it/s]


Epoch:6 Step:744 Val_loss:0.649960
Epoch:6 Step:752 Training_loss:0.474423 Training_loss_avg:0.523151
Epoch:6 Step:760 Training_loss:0.544147 Training_loss_avg:0.525915
Epoch:6 Step:768 Training_loss:0.314178 Training_loss_avg:0.523828
Epoch:6 Step:776 Training_loss:0.588450 Training_loss_avg:0.529877
Epoch:6 Step:784 Training_loss:0.361024 Training_loss_avg:0.526452
Epoch:6 Step:792 Training_loss:0.429442 Training_loss_avg:0.524761
Epoch:6 Step:800 Training_loss:0.461730 Training_loss_avg:0.522370
Epoch:6 Step:808 Training_loss:0.526180 Training_loss_avg:0.524667
Epoch:6 Step:816 Training_loss:0.426808 Training_loss_avg:0.522103
Epoch:6 Step:824 Training_loss:0.404287 Training_loss_avg:0.521003
Epoch:6 Step:832 Training_loss:0.411895 Training_loss_avg:0.523031
Epoch:6 Step:840 Training_loss:0.650738 Training_loss_avg:0.524187
Epoch:6 Step:848 Training_loss:0.450876 Training_loss_avg:0.520328
Epoch:6 Step:856 Training_loss:0.319755 Training_loss_avg:0.513770
Epoch:6 Step:864 Training_l

52it [00:07,  6.63it/s]


Epoch:6 Step:992 Val_loss:0.703723
Epoch:6 Step:1000 Training_loss:0.405270 Training_loss_avg:0.497461
Epoch:6 Step:1008 Training_loss:0.138568 Training_loss_avg:0.492855
Epoch:6 Step:1016 Training_loss:0.486158 Training_loss_avg:0.490574
Epoch:6 Step:1024 Training_loss:0.443984 Training_loss_avg:0.489802
Epoch:6 Step:1032 Training_loss:0.378911 Training_loss_avg:0.491233
Epoch:6 Step:1040 Training_loss:0.684375 Training_loss_avg:0.491162
Epoch:6 Step:1048 Training_loss:0.408848 Training_loss_avg:0.486394
Epoch:6 Step:1056 Training_loss:0.701298 Training_loss_avg:0.485435
Epoch:6 Step:1064 Training_loss:0.592607 Training_loss_avg:0.474236
Epoch:6 Step:1072 Training_loss:0.574467 Training_loss_avg:0.469907
Epoch:6 Step:1080 Training_loss:0.401703 Training_loss_avg:0.465974
Epoch:6 Step:1088 Training_loss:0.464156 Training_loss_avg:0.461879
Epoch:6 Step:1096 Training_loss:0.436404 Training_loss_avg:0.458801
Epoch:6 Step:1104 Training_loss:0.643878 Training_loss_avg:0.461664
Epoch:6 Step:

52it [00:07,  6.63it/s]


Epoch:6 Step:1240 Val_loss:0.677561
Epoch:6 Step:1248 Training_loss:0.422935 Training_loss_avg:0.463046
Epoch:6 Step:1256 Training_loss:0.588227 Training_loss_avg:0.468415
Epoch:6 Step:1264 Training_loss:0.421392 Training_loss_avg:0.470669
Epoch:6 Step:1272 Training_loss:0.639609 Training_loss_avg:0.470935
Epoch:6 Step:1280 Training_loss:0.459957 Training_loss_avg:0.474092
Epoch:6 Step:1288 Training_loss:0.425096 Training_loss_avg:0.473258
Epoch:6 Step:1296 Training_loss:0.501483 Training_loss_avg:0.475880
Epoch:6 Step:1304 Training_loss:0.518242 Training_loss_avg:0.477728
Epoch:6 Step:1312 Training_loss:1.005063 Training_loss_avg:0.488085
Epoch:6 Step:1320 Training_loss:0.535053 Training_loss_avg:0.491049
Epoch:6 Step:1328 Training_loss:0.538911 Training_loss_avg:0.490303
Epoch:6 Step:1336 Training_loss:0.406109 Training_loss_avg:0.486798
Epoch:6 Step:1344 Training_loss:0.535149 Training_loss_avg:0.493164
Epoch:6 Step:1352 Training_loss:0.284464 Training_loss_avg:0.488403
Epoch:6 Step

52it [00:07,  6.63it/s]


Epoch:6 Step:1488 Val_loss:0.715751
Epoch:6 Step:1496 Training_loss:0.718393 Training_loss_avg:0.517315
Epoch:6 Step:1504 Training_loss:0.375255 Training_loss_avg:0.511943
Epoch:6 Step:1512 Training_loss:0.669605 Training_loss_avg:0.521329
Epoch:6 Step:1520 Training_loss:0.425414 Training_loss_avg:0.515747
Epoch:6 Step:1528 Training_loss:0.546245 Training_loss_avg:0.517203
Epoch:6 Step:1536 Training_loss:0.310717 Training_loss_avg:0.504480
Epoch:6 Step:1544 Training_loss:1.289324 Training_loss_avg:0.521896
Epoch:6 Step:1552 Training_loss:0.665047 Training_loss_avg:0.529006
Epoch:6 Step:1560 Training_loss:0.346837 Training_loss_avg:0.526021
Epoch:6 Step:1568 Training_loss:0.792979 Training_loss_avg:0.534616
Epoch:6 Step:1576 Training_loss:0.717475 Training_loss_avg:0.535586
Epoch:6 Step:1584 Training_loss:0.649744 Training_loss_avg:0.543508
Epoch:6 Step:1592 Training_loss:0.474404 Training_loss_avg:0.549863
Epoch:6 Step:1600 Training_loss:0.416413 Training_loss_avg:0.543719
Epoch:6 Step

52it [00:07,  6.63it/s]


Epoch:6 Step:1736 Val_loss:0.751578
Epoch:6 Step:1744 Training_loss:0.390903 Training_loss_avg:0.551305
Epoch:6 Step:1752 Training_loss:0.269697 Training_loss_avg:0.551010
Epoch:6 Step:1760 Training_loss:0.448532 Training_loss_avg:0.547979
Epoch:6 Step:1768 Training_loss:0.720616 Training_loss_avg:0.553717
Epoch:6 Step:1776 Training_loss:0.167814 Training_loss_avg:0.547180
Epoch:6 Step:1784 Training_loss:0.430159 Training_loss_avg:0.546766
Epoch:6 Step:1792 Training_loss:0.453057 Training_loss_avg:0.546948
Epoch:6 Step:1800 Training_loss:0.465692 Training_loss_avg:0.547422
Epoch:6 Step:1808 Training_loss:0.465212 Training_loss_avg:0.547159
Epoch:6 Step:1816 Training_loss:0.684795 Training_loss_avg:0.551644
Epoch:6 Step:1824 Training_loss:0.322654 Training_loss_avg:0.547943
Epoch:6 Step:1832 Training_loss:0.391639 Training_loss_avg:0.541198
Epoch:6 Step:1840 Training_loss:0.556124 Training_loss_avg:0.538321
Epoch:6 Step:1848 Training_loss:0.123503 Training_loss_avg:0.530809
Epoch:6 Step

52it [00:07,  6.63it/s]


Epoch:6 Step:1984 Val_loss:0.666814
Epoch:6 Step:1992 Training_loss:0.457080 Training_loss_avg:0.530506
Epoch:6 Step:2000 Training_loss:0.254327 Training_loss_avg:0.527265
Epoch:6 Step:2008 Training_loss:0.255343 Training_loss_avg:0.515119
Epoch:6 Step:2016 Training_loss:0.702032 Training_loss_avg:0.520335
Epoch:6 Step:2024 Training_loss:0.531154 Training_loss_avg:0.515149
Epoch:6 Step:2032 Training_loss:0.677030 Training_loss_avg:0.520986
Epoch:6 Step:2040 Training_loss:0.368106 Training_loss_avg:0.512453
Epoch:6 Step:2048 Training_loss:0.748452 Training_loss_avg:0.516917
Epoch:6 Step:2056 Training_loss:0.424830 Training_loss_avg:0.512325
Epoch:6 Step:2064 Training_loss:0.534324 Training_loss_avg:0.510813
Epoch:6 Step:2072 Training_loss:0.558236 Training_loss_avg:0.513785
Epoch:6 Step:2080 Training_loss:0.401003 Training_loss_avg:0.512970
Epoch:6 Step:2088 Training_loss:0.527877 Training_loss_avg:0.514985
Epoch:6 Step:2096 Training_loss:0.547339 Training_loss_avg:0.510417
Epoch:6 Step

52it [00:07,  6.64it/s]


Epoch:6 Step:2232 Val_loss:0.676937
Epoch:6 Step:2240 Training_loss:0.342603 Training_loss_avg:0.501194
Epoch:6 Step:2248 Training_loss:0.420548 Training_loss_avg:0.507135
Epoch:6 Step:2256 Training_loss:0.664550 Training_loss_avg:0.510002
Epoch:6 Step:2264 Training_loss:0.511460 Training_loss_avg:0.510360
Epoch:6 Step:2272 Training_loss:0.373765 Training_loss_avg:0.506859
Epoch:6 Step:2280 Training_loss:0.434074 Training_loss_avg:0.507897
Epoch:6 Step:2288 Training_loss:0.377248 Training_loss_avg:0.503784
Epoch:6 Step:2296 Training_loss:0.447540 Training_loss_avg:0.490892
Epoch:6 Step:2304 Training_loss:0.535859 Training_loss_avg:0.492781
Epoch:6 Step:2312 Training_loss:0.277223 Training_loss_avg:0.485529
Epoch:6 Step:2320 Training_loss:0.613849 Training_loss_avg:0.483351
Epoch:6 Step:2328 Training_loss:0.380076 Training_loss_avg:0.482538
Epoch:6 Step:2336 Training_loss:0.464214 Training_loss_avg:0.478306
Epoch:6 Step:2344 Training_loss:0.438626 Training_loss_avg:0.475156
Epoch:6 Step

52it [00:07,  6.63it/s]


Epoch:6 Step:2480 Val_loss:0.723641
Epoch:6 Step:2488 Training_loss:0.951127 Training_loss_avg:0.470117
Epoch:6 Step:2496 Training_loss:0.436271 Training_loss_avg:0.467896
Epoch:6 Step:2504 Training_loss:0.389564 Training_loss_avg:0.466840
Epoch:6 Step:2512 Training_loss:0.447087 Training_loss_avg:0.471311
Epoch:6 Step:2520 Training_loss:0.824831 Training_loss_avg:0.475929
Epoch:6 Step:2528 Training_loss:0.558981 Training_loss_avg:0.479362
Epoch:6 Step:2536 Training_loss:0.558886 Training_loss_avg:0.481476
Epoch:6 Step:2544 Training_loss:0.489870 Training_loss_avg:0.479358
Epoch:6 Step:2552 Training_loss:0.391180 Training_loss_avg:0.476034
Epoch:6 Step:2560 Training_loss:0.595015 Training_loss_avg:0.477666
Epoch:6 Step:2568 Training_loss:0.375686 Training_loss_avg:0.478633
Epoch:6 Step:2576 Training_loss:0.661985 Training_loss_avg:0.482660
Epoch:6 Step:2584 Training_loss:0.769072 Training_loss_avg:0.489435
Epoch:6 Step:2592 Training_loss:0.460909 Training_loss_avg:0.489154
Epoch:6 Step

52it [00:07,  6.63it/s]


Epoch:6 Step:2728 Val_loss:0.679871
Epoch:6 Step:2736 Training_loss:0.706010 Training_loss_avg:0.540689
Epoch:6 Step:2744 Training_loss:0.611947 Training_loss_avg:0.544156
Epoch:6 Step:2752 Training_loss:0.574600 Training_loss_avg:0.547524
Epoch:6 Step:2760 Training_loss:0.592852 Training_loss_avg:0.548578
Epoch:6 Step:2768 Training_loss:0.749849 Training_loss_avg:0.553939
Epoch:6 Step:2776 Training_loss:0.302391 Training_loss_avg:0.553961
Epoch:6 Step:2784 Training_loss:0.504950 Training_loss_avg:0.561213
Epoch:6 Step:2792 Training_loss:0.432515 Training_loss_avg:0.556726
Epoch:6 Step:2800 Training_loss:0.536450 Training_loss_avg:0.562395
Epoch:6 Step:2808 Training_loss:0.843040 Training_loss_avg:0.564198
Epoch:6 Step:2816 Training_loss:0.645109 Training_loss_avg:0.569412
Epoch:6 Step:2824 Training_loss:0.607363 Training_loss_avg:0.561323
Epoch:6 Step:2832 Training_loss:0.407897 Training_loss_avg:0.558517
Epoch:6 Step:2840 Training_loss:0.791324 Training_loss_avg:0.562669
Epoch:6 Step

52it [00:07,  6.63it/s]


Epoch:6 Step:2976 Val_loss:0.647295
Epoch:6 Step:2984 Training_loss:0.482646 Training_loss_avg:0.540078
Epoch:6 Step:2992 Training_loss:0.490807 Training_loss_avg:0.540676
Epoch:6 Step:3000 Training_loss:0.522189 Training_loss_avg:0.532590
Epoch:6 Step:3008 Training_loss:0.399459 Training_loss_avg:0.528098
Epoch:6 Step:3016 Training_loss:0.655137 Training_loss_avg:0.528579
Epoch:6 Step:3024 Training_loss:0.367546 Training_loss_avg:0.526927
Epoch:6 Step:3032 Training_loss:0.345481 Training_loss_avg:0.521558
Epoch:6 Step:3040 Training_loss:0.473848 Training_loss_avg:0.521229
Epoch:6 Step:3048 Training_loss:0.187742 Training_loss_avg:0.514641
Epoch:6 Step:3056 Training_loss:0.319992 Training_loss_avg:0.506770
Epoch:6 Step:3064 Training_loss:0.508253 Training_loss_avg:0.507533
Epoch:6 Step:3072 Training_loss:0.323836 Training_loss_avg:0.505600
Epoch:6 Step:3080 Training_loss:0.429154 Training_loss_avg:0.500014
Epoch:6 Step:3088 Training_loss:0.545779 Training_loss_avg:0.499466
Epoch:6 Step

52it [00:07,  6.63it/s]


Epoch:6 Step:3224 Val_loss:0.715176
Epoch:6 Step:3232 Training_loss:0.486823 Training_loss_avg:0.453449
Epoch:6 Step:3240 Training_loss:0.371429 Training_loss_avg:0.445051
Epoch:6 Step:3248 Training_loss:0.522031 Training_loss_avg:0.446192
Epoch:6 Step:3256 Training_loss:0.330574 Training_loss_avg:0.436248
Epoch:6 Step:3264 Training_loss:0.501485 Training_loss_avg:0.436814
Epoch:6 Step:3272 Training_loss:0.584081 Training_loss_avg:0.437761
Epoch:6 Step:3280 Training_loss:0.878313 Training_loss_avg:0.450018
Epoch:6 Step:3288 Training_loss:0.375813 Training_loss_avg:0.445076
Epoch:6 Step:3296 Training_loss:0.594802 Training_loss_avg:0.446521
Epoch:6 Step:3304 Training_loss:0.423591 Training_loss_avg:0.449329
Epoch:6 Step:3312 Training_loss:0.753370 Training_loss_avg:0.456295
Epoch:6 Step:3320 Training_loss:0.456170 Training_loss_avg:0.459656
Epoch:6 Step:3328 Training_loss:0.501707 Training_loss_avg:0.463574
Epoch:7 Step:0 Training_loss:0.338812 Training_loss_avg:0.460506
Validating:


52it [00:07,  6.63it/s]


Epoch:7 Step:0 Val_loss:0.701245
Epoch:7 Step:8 Training_loss:0.200293 Training_loss_avg:0.455555
Epoch:7 Step:16 Training_loss:0.337509 Training_loss_avg:0.451759
Epoch:7 Step:24 Training_loss:0.490489 Training_loss_avg:0.452755
Epoch:7 Step:32 Training_loss:0.607034 Training_loss_avg:0.455189
Epoch:7 Step:40 Training_loss:0.355582 Training_loss_avg:0.453712
Epoch:7 Step:48 Training_loss:0.466479 Training_loss_avg:0.453388
Epoch:7 Step:56 Training_loss:0.408245 Training_loss_avg:0.451737
Epoch:7 Step:64 Training_loss:0.441072 Training_loss_avg:0.450115
Epoch:7 Step:72 Training_loss:0.365242 Training_loss_avg:0.449431
Epoch:7 Step:80 Training_loss:0.234399 Training_loss_avg:0.441016
Epoch:7 Step:88 Training_loss:0.200551 Training_loss_avg:0.437676
Epoch:7 Step:96 Training_loss:0.318765 Training_loss_avg:0.437142
Epoch:7 Step:104 Training_loss:0.347337 Training_loss_avg:0.434611
Epoch:7 Step:112 Training_loss:0.512866 Training_loss_avg:0.441114
Epoch:7 Step:120 Training_loss:0.124346 Tr

52it [00:07,  6.63it/s]


Epoch:7 Step:248 Val_loss:0.819573
Epoch:7 Step:256 Training_loss:0.767969 Training_loss_avg:0.444040
Epoch:7 Step:264 Training_loss:0.488794 Training_loss_avg:0.446678
Epoch:7 Step:272 Training_loss:0.209977 Training_loss_avg:0.441157
Epoch:7 Step:280 Training_loss:0.657183 Training_loss_avg:0.440822
Epoch:7 Step:288 Training_loss:0.312831 Training_loss_avg:0.433862
Epoch:7 Step:296 Training_loss:0.349155 Training_loss_avg:0.431108
Epoch:7 Step:304 Training_loss:0.277127 Training_loss_avg:0.429222
Epoch:7 Step:312 Training_loss:0.299219 Training_loss_avg:0.424766
Epoch:7 Step:320 Training_loss:0.458770 Training_loss_avg:0.427330
Epoch:7 Step:328 Training_loss:0.397833 Training_loss_avg:0.425257
Epoch:7 Step:336 Training_loss:0.389686 Training_loss_avg:0.421369
Epoch:7 Step:344 Training_loss:0.444580 Training_loss_avg:0.412694
Epoch:7 Step:352 Training_loss:0.344433 Training_loss_avg:0.412067
Epoch:7 Step:360 Training_loss:0.867322 Training_loss_avg:0.417517
Epoch:7 Step:368 Training_l

52it [00:07,  6.63it/s]


Epoch:7 Step:496 Val_loss:0.716023
Epoch:7 Step:504 Training_loss:0.450462 Training_loss_avg:0.429140
Epoch:7 Step:512 Training_loss:0.420145 Training_loss_avg:0.427285
Epoch:7 Step:520 Training_loss:0.499588 Training_loss_avg:0.434790
Epoch:7 Step:528 Training_loss:0.341385 Training_loss_avg:0.434472
Epoch:7 Step:536 Training_loss:0.593525 Training_loss_avg:0.433799
Epoch:7 Step:544 Training_loss:0.236067 Training_loss_avg:0.430699
Epoch:7 Step:552 Training_loss:0.678958 Training_loss_avg:0.439526
Epoch:7 Step:560 Training_loss:0.338607 Training_loss_avg:0.439709
Epoch:7 Step:568 Training_loss:0.249668 Training_loss_avg:0.436041
Epoch:7 Step:576 Training_loss:0.200267 Training_loss_avg:0.433036
Epoch:7 Step:584 Training_loss:0.555044 Training_loss_avg:0.434735
Epoch:7 Step:592 Training_loss:0.250714 Training_loss_avg:0.425187
Epoch:7 Step:600 Training_loss:0.273601 Training_loss_avg:0.419650
Epoch:7 Step:608 Training_loss:0.459486 Training_loss_avg:0.424722
Epoch:7 Step:616 Training_l

52it [00:07,  6.64it/s]


Epoch:7 Step:744 Val_loss:0.722935
Epoch:7 Step:752 Training_loss:0.411801 Training_loss_avg:0.438302
Epoch:7 Step:760 Training_loss:0.367357 Training_loss_avg:0.428303
Epoch:7 Step:768 Training_loss:0.350997 Training_loss_avg:0.426783
Epoch:7 Step:776 Training_loss:0.595925 Training_loss_avg:0.429648
Epoch:7 Step:784 Training_loss:0.269890 Training_loss_avg:0.426248
Epoch:7 Step:792 Training_loss:0.568876 Training_loss_avg:0.430896
Epoch:7 Step:800 Training_loss:0.643754 Training_loss_avg:0.434732
Epoch:7 Step:808 Training_loss:0.444854 Training_loss_avg:0.436557
Epoch:7 Step:816 Training_loss:0.301526 Training_loss_avg:0.429668
Epoch:7 Step:824 Training_loss:0.424340 Training_loss_avg:0.427747
Epoch:7 Step:832 Training_loss:0.182828 Training_loss_avg:0.422069
Epoch:7 Step:840 Training_loss:0.383486 Training_loss_avg:0.421388
Epoch:7 Step:848 Training_loss:0.317126 Training_loss_avg:0.421343
Epoch:7 Step:856 Training_loss:0.400517 Training_loss_avg:0.422134
Epoch:7 Step:864 Training_l

52it [00:07,  6.63it/s]


Epoch:7 Step:992 Val_loss:0.805036
Epoch:7 Step:1000 Training_loss:0.093643 Training_loss_avg:0.404404
Epoch:7 Step:1008 Training_loss:0.583305 Training_loss_avg:0.406880
Epoch:7 Step:1016 Training_loss:0.280614 Training_loss_avg:0.399520
Epoch:7 Step:1024 Training_loss:0.240444 Training_loss_avg:0.396300
Epoch:7 Step:1032 Training_loss:0.732577 Training_loss_avg:0.401191
Epoch:7 Step:1040 Training_loss:0.870078 Training_loss_avg:0.407505
Epoch:7 Step:1048 Training_loss:0.374406 Training_loss_avg:0.404470
Epoch:7 Step:1056 Training_loss:0.360062 Training_loss_avg:0.405270
Epoch:7 Step:1064 Training_loss:0.492486 Training_loss_avg:0.404652
Epoch:7 Step:1072 Training_loss:0.888919 Training_loss_avg:0.415610
Epoch:7 Step:1080 Training_loss:0.252452 Training_loss_avg:0.417533
Epoch:7 Step:1088 Training_loss:0.149347 Training_loss_avg:0.408652
Epoch:7 Step:1096 Training_loss:0.302313 Training_loss_avg:0.408604
Epoch:7 Step:1104 Training_loss:0.342215 Training_loss_avg:0.410874
Epoch:7 Step:

52it [00:07,  6.63it/s]


Epoch:7 Step:1240 Val_loss:0.701626
Epoch:7 Step:1248 Training_loss:0.523823 Training_loss_avg:0.428573
Epoch:7 Step:1256 Training_loss:0.268965 Training_loss_avg:0.425942
Epoch:7 Step:1264 Training_loss:0.423051 Training_loss_avg:0.426044
Epoch:7 Step:1272 Training_loss:0.277811 Training_loss_avg:0.419856
Epoch:7 Step:1280 Training_loss:0.494727 Training_loss_avg:0.421066
Epoch:7 Step:1288 Training_loss:0.636834 Training_loss_avg:0.425967
Epoch:7 Step:1296 Training_loss:0.712473 Training_loss_avg:0.431545
Epoch:7 Step:1304 Training_loss:0.137021 Training_loss_avg:0.424464
Epoch:7 Step:1312 Training_loss:0.413173 Training_loss_avg:0.430580
Epoch:7 Step:1320 Training_loss:0.553317 Training_loss_avg:0.438624
Epoch:7 Step:1328 Training_loss:0.204307 Training_loss_avg:0.437716
Epoch:7 Step:1336 Training_loss:0.234055 Training_loss_avg:0.431506
Epoch:7 Step:1344 Training_loss:0.294327 Training_loss_avg:0.429795
Epoch:7 Step:1352 Training_loss:0.226317 Training_loss_avg:0.422037
Epoch:7 Step

52it [00:07,  6.63it/s]


Epoch:7 Step:1488 Val_loss:0.919419
Epoch:7 Step:1496 Training_loss:1.387515 Training_loss_avg:0.470561
Epoch:7 Step:1504 Training_loss:0.302981 Training_loss_avg:0.469776
Epoch:7 Step:1512 Training_loss:0.504531 Training_loss_avg:0.473864
Epoch:7 Step:1520 Training_loss:0.505886 Training_loss_avg:0.475711
Epoch:7 Step:1528 Training_loss:0.192121 Training_loss_avg:0.475229
Epoch:7 Step:1536 Training_loss:0.609866 Training_loss_avg:0.480998
Epoch:7 Step:1544 Training_loss:0.654292 Training_loss_avg:0.479679
Epoch:7 Step:1552 Training_loss:0.392520 Training_loss_avg:0.472479
Epoch:7 Step:1560 Training_loss:0.643348 Training_loss_avg:0.478814
Epoch:7 Step:1568 Training_loss:0.788755 Training_loss_avg:0.486690
Epoch:7 Step:1576 Training_loss:0.334666 Training_loss_avg:0.480902
Epoch:7 Step:1584 Training_loss:0.468167 Training_loss_avg:0.483959
Epoch:7 Step:1592 Training_loss:0.709608 Training_loss_avg:0.490660
Epoch:7 Step:1600 Training_loss:0.253753 Training_loss_avg:0.478696
Epoch:7 Step

52it [00:07,  6.64it/s]


Epoch:7 Step:1736 Val_loss:0.676483
Epoch:7 Step:1744 Training_loss:0.222997 Training_loss_avg:0.497423
Epoch:7 Step:1752 Training_loss:0.458673 Training_loss_avg:0.502070
Epoch:7 Step:1760 Training_loss:0.450731 Training_loss_avg:0.505002
Epoch:7 Step:1768 Training_loss:0.316300 Training_loss_avg:0.500155
Epoch:7 Step:1776 Training_loss:0.790400 Training_loss_avg:0.506371
Epoch:7 Step:1784 Training_loss:0.266634 Training_loss_avg:0.505448
Epoch:7 Step:1792 Training_loss:0.467347 Training_loss_avg:0.505773
Epoch:7 Step:1800 Training_loss:0.694257 Training_loss_avg:0.505819
Epoch:7 Step:1808 Training_loss:0.255227 Training_loss_avg:0.499565
Epoch:7 Step:1816 Training_loss:0.544795 Training_loss_avg:0.490730
Epoch:7 Step:1824 Training_loss:0.453320 Training_loss_avg:0.489877
Epoch:7 Step:1832 Training_loss:0.226765 Training_loss_avg:0.489424
Epoch:7 Step:1840 Training_loss:0.402827 Training_loss_avg:0.485066
Epoch:7 Step:1848 Training_loss:0.256463 Training_loss_avg:0.486117
Epoch:7 Step

52it [00:07,  6.63it/s]


Epoch:7 Step:1984 Val_loss:0.650544
Epoch:7 Step:1992 Training_loss:0.835989 Training_loss_avg:0.490372
Epoch:7 Step:2000 Training_loss:0.587545 Training_loss_avg:0.497048
Epoch:7 Step:2008 Training_loss:0.373292 Training_loss_avg:0.491873
Epoch:7 Step:2016 Training_loss:0.588646 Training_loss_avg:0.494642
Epoch:7 Step:2024 Training_loss:0.379908 Training_loss_avg:0.490459
Epoch:7 Step:2032 Training_loss:0.598455 Training_loss_avg:0.495901
Epoch:7 Step:2040 Training_loss:0.349008 Training_loss_avg:0.490061
Epoch:7 Step:2048 Training_loss:0.374451 Training_loss_avg:0.484857
Epoch:7 Step:2056 Training_loss:0.412051 Training_loss_avg:0.488045
Epoch:7 Step:2064 Training_loss:0.445155 Training_loss_avg:0.488591
Epoch:7 Step:2072 Training_loss:0.598453 Training_loss_avg:0.487617
Epoch:7 Step:2080 Training_loss:0.197831 Training_loss_avg:0.485985
Epoch:7 Step:2088 Training_loss:0.539322 Training_loss_avg:0.488506
Epoch:7 Step:2096 Training_loss:0.157122 Training_loss_avg:0.474535
Epoch:7 Step

52it [00:07,  6.64it/s]


Epoch:7 Step:2232 Val_loss:0.702498
Epoch:7 Step:2240 Training_loss:0.468224 Training_loss_avg:0.464622
Epoch:7 Step:2248 Training_loss:0.518077 Training_loss_avg:0.469855
Epoch:7 Step:2256 Training_loss:0.135581 Training_loss_avg:0.462994
Epoch:7 Step:2264 Training_loss:0.383599 Training_loss_avg:0.451330
Epoch:7 Step:2272 Training_loss:0.363874 Training_loss_avg:0.451244
Epoch:7 Step:2280 Training_loss:0.554313 Training_loss_avg:0.453113
Epoch:7 Step:2288 Training_loss:0.489964 Training_loss_avg:0.455758
Epoch:7 Step:2296 Training_loss:0.270276 Training_loss_avg:0.451279
Epoch:7 Step:2304 Training_loss:0.389183 Training_loss_avg:0.444209
Epoch:7 Step:2312 Training_loss:0.515154 Training_loss_avg:0.444679
Epoch:7 Step:2320 Training_loss:0.469853 Training_loss_avg:0.438606
Epoch:7 Step:2328 Training_loss:0.247540 Training_loss_avg:0.429583
Epoch:7 Step:2336 Training_loss:0.359449 Training_loss_avg:0.429270
Epoch:7 Step:2344 Training_loss:0.261656 Training_loss_avg:0.425185
Epoch:7 Step

52it [00:07,  6.64it/s]


Epoch:7 Step:2480 Val_loss:0.729428
Epoch:7 Step:2488 Training_loss:0.717808 Training_loss_avg:0.424289
Epoch:7 Step:2496 Training_loss:0.918890 Training_loss_avg:0.439525
Epoch:7 Step:2504 Training_loss:0.229499 Training_loss_avg:0.436921
Epoch:7 Step:2512 Training_loss:0.364451 Training_loss_avg:0.437394
Epoch:7 Step:2520 Training_loss:0.375827 Training_loss_avg:0.438476
Epoch:7 Step:2528 Training_loss:0.446138 Training_loss_avg:0.438537
Epoch:7 Step:2536 Training_loss:0.404497 Training_loss_avg:0.436223
Epoch:7 Step:2544 Training_loss:0.379575 Training_loss_avg:0.441006
Epoch:7 Step:2552 Training_loss:0.438875 Training_loss_avg:0.439260
Epoch:7 Step:2560 Training_loss:0.390606 Training_loss_avg:0.434899
Epoch:7 Step:2568 Training_loss:0.664289 Training_loss_avg:0.444575
Epoch:7 Step:2576 Training_loss:0.632958 Training_loss_avg:0.449858
Epoch:7 Step:2584 Training_loss:0.409448 Training_loss_avg:0.443134
Epoch:7 Step:2592 Training_loss:0.436704 Training_loss_avg:0.444926
Epoch:7 Step

52it [00:07,  6.63it/s]


Epoch:7 Step:2728 Val_loss:0.776447
Epoch:7 Step:2736 Training_loss:0.364161 Training_loss_avg:0.464941
Epoch:7 Step:2744 Training_loss:0.361979 Training_loss_avg:0.466948
Epoch:7 Step:2752 Training_loss:0.299676 Training_loss_avg:0.464701
Epoch:7 Step:2760 Training_loss:0.399619 Training_loss_avg:0.461528
Epoch:7 Step:2768 Training_loss:0.488980 Training_loss_avg:0.466767
Epoch:7 Step:2776 Training_loss:0.254116 Training_loss_avg:0.462288
Epoch:7 Step:2784 Training_loss:0.172923 Training_loss_avg:0.461581
Epoch:7 Step:2792 Training_loss:0.352336 Training_loss_avg:0.457388
Epoch:7 Step:2800 Training_loss:0.659225 Training_loss_avg:0.460967
Epoch:7 Step:2808 Training_loss:0.320858 Training_loss_avg:0.458587
Epoch:7 Step:2816 Training_loss:0.242259 Training_loss_avg:0.458723
Epoch:7 Step:2824 Training_loss:0.220979 Training_loss_avg:0.452612
Epoch:7 Step:2832 Training_loss:0.218755 Training_loss_avg:0.451298
Epoch:7 Step:2840 Training_loss:0.476945 Training_loss_avg:0.446335
Epoch:7 Step

52it [00:07,  6.63it/s]


Epoch:7 Step:2976 Val_loss:0.803219
Epoch:7 Step:2984 Training_loss:0.302363 Training_loss_avg:0.431141
Epoch:7 Step:2992 Training_loss:0.553493 Training_loss_avg:0.433477
Epoch:7 Step:3000 Training_loss:0.207355 Training_loss_avg:0.433479
Epoch:7 Step:3008 Training_loss:0.388164 Training_loss_avg:0.435976
Epoch:7 Step:3016 Training_loss:0.658719 Training_loss_avg:0.437522
Epoch:7 Step:3024 Training_loss:0.494419 Training_loss_avg:0.440553
Epoch:7 Step:3032 Training_loss:0.251382 Training_loss_avg:0.434226
Epoch:7 Step:3040 Training_loss:0.269368 Training_loss_avg:0.420396
Epoch:7 Step:3048 Training_loss:0.288093 Training_loss_avg:0.407139
Epoch:7 Step:3056 Training_loss:0.505981 Training_loss_avg:0.410417
Epoch:7 Step:3064 Training_loss:0.458730 Training_loss_avg:0.410078
Epoch:7 Step:3072 Training_loss:0.458097 Training_loss_avg:0.412366
Epoch:7 Step:3080 Training_loss:0.314341 Training_loss_avg:0.404530
Epoch:7 Step:3088 Training_loss:0.203629 Training_loss_avg:0.402168
Epoch:7 Step

52it [00:07,  6.63it/s]


Epoch:7 Step:3224 Val_loss:0.899734
Epoch:7 Step:3232 Training_loss:0.171169 Training_loss_avg:0.433405
Epoch:7 Step:3240 Training_loss:0.384776 Training_loss_avg:0.431561
Epoch:7 Step:3248 Training_loss:0.353584 Training_loss_avg:0.427035
Epoch:7 Step:3256 Training_loss:0.274067 Training_loss_avg:0.417697
Epoch:7 Step:3264 Training_loss:0.401197 Training_loss_avg:0.415090
Epoch:7 Step:3272 Training_loss:0.515590 Training_loss_avg:0.418727
Epoch:7 Step:3280 Training_loss:0.781432 Training_loss_avg:0.430210
Epoch:7 Step:3288 Training_loss:0.395602 Training_loss_avg:0.428005
Epoch:7 Step:3296 Training_loss:0.548694 Training_loss_avg:0.424786
Epoch:7 Step:3304 Training_loss:0.218592 Training_loss_avg:0.424228
Epoch:7 Step:3312 Training_loss:0.281497 Training_loss_avg:0.419947
Epoch:7 Step:3320 Training_loss:0.307585 Training_loss_avg:0.413023
Epoch:7 Step:3328 Training_loss:0.213126 Training_loss_avg:0.412262
Epoch:8 Step:0 Training_loss:0.345272 Training_loss_avg:0.409846
Validating:


52it [00:07,  6.63it/s]


Epoch:8 Step:0 Val_loss:0.732115
Epoch:8 Step:8 Training_loss:0.307406 Training_loss_avg:0.399437
Epoch:8 Step:16 Training_loss:0.300364 Training_loss_avg:0.395573
Epoch:8 Step:24 Training_loss:0.408198 Training_loss_avg:0.397697
Epoch:8 Step:32 Training_loss:0.502467 Training_loss_avg:0.399454
Epoch:8 Step:40 Training_loss:0.347897 Training_loss_avg:0.399318
Epoch:8 Step:48 Training_loss:0.226341 Training_loss_avg:0.397798
Epoch:8 Step:56 Training_loss:0.225950 Training_loss_avg:0.391247
Epoch:8 Step:64 Training_loss:0.427971 Training_loss_avg:0.395659
Epoch:8 Step:72 Training_loss:0.442137 Training_loss_avg:0.396739
Epoch:8 Step:80 Training_loss:0.857596 Training_loss_avg:0.400716
Epoch:8 Step:88 Training_loss:0.542341 Training_loss_avg:0.401675
Epoch:8 Step:96 Training_loss:0.308228 Training_loss_avg:0.402812
Epoch:8 Step:104 Training_loss:0.228311 Training_loss_avg:0.401990
Epoch:8 Step:112 Training_loss:0.172627 Training_loss_avg:0.399681
Epoch:8 Step:120 Training_loss:0.167607 Tr

52it [00:07,  6.64it/s]


Epoch:8 Step:248 Val_loss:0.906997
Epoch:8 Step:256 Training_loss:0.134897 Training_loss_avg:0.365167
Epoch:8 Step:264 Training_loss:0.330732 Training_loss_avg:0.365231
Epoch:8 Step:272 Training_loss:0.637762 Training_loss_avg:0.370802
Epoch:8 Step:280 Training_loss:0.398788 Training_loss_avg:0.358176
Epoch:8 Step:288 Training_loss:0.769174 Training_loss_avg:0.361793
Epoch:8 Step:296 Training_loss:0.227183 Training_loss_avg:0.362913
Epoch:8 Step:304 Training_loss:0.662213 Training_loss_avg:0.368462
Epoch:8 Step:312 Training_loss:0.314573 Training_loss_avg:0.367681
Epoch:8 Step:320 Training_loss:0.282775 Training_loss_avg:0.367856
Epoch:8 Step:328 Training_loss:0.252797 Training_loss_avg:0.364888
Epoch:8 Step:336 Training_loss:0.545559 Training_loss_avg:0.365487
Epoch:8 Step:344 Training_loss:0.729675 Training_loss_avg:0.364452
Epoch:8 Step:352 Training_loss:0.394568 Training_loss_avg:0.364431
Epoch:8 Step:360 Training_loss:0.421517 Training_loss_avg:0.361888
Epoch:8 Step:368 Training_l

52it [00:07,  6.63it/s]


Epoch:8 Step:496 Val_loss:0.755022
Epoch:8 Step:504 Training_loss:0.330236 Training_loss_avg:0.397414
Epoch:8 Step:512 Training_loss:0.555790 Training_loss_avg:0.405077
Epoch:8 Step:520 Training_loss:0.613882 Training_loss_avg:0.414003
Epoch:8 Step:528 Training_loss:0.341537 Training_loss_avg:0.417644
Epoch:8 Step:536 Training_loss:0.469510 Training_loss_avg:0.417814
Epoch:8 Step:544 Training_loss:0.799776 Training_loss_avg:0.429670
Epoch:8 Step:552 Training_loss:0.287715 Training_loss_avg:0.429498
Epoch:8 Step:560 Training_loss:0.342314 Training_loss_avg:0.429410
Epoch:8 Step:568 Training_loss:0.499876 Training_loss_avg:0.436115
Epoch:8 Step:576 Training_loss:0.240879 Training_loss_avg:0.434333
Epoch:8 Step:584 Training_loss:0.167758 Training_loss_avg:0.431706
Epoch:8 Step:592 Training_loss:0.443660 Training_loss_avg:0.433500
Epoch:8 Step:600 Training_loss:0.436700 Training_loss_avg:0.436585
Epoch:8 Step:608 Training_loss:0.800653 Training_loss_avg:0.440514
Epoch:8 Step:616 Training_l

52it [00:07,  6.63it/s]


Epoch:8 Step:744 Val_loss:0.713002
Epoch:8 Step:752 Training_loss:0.764029 Training_loss_avg:0.460248
Epoch:8 Step:760 Training_loss:0.266554 Training_loss_avg:0.457149
Epoch:8 Step:768 Training_loss:0.298035 Training_loss_avg:0.450228
Epoch:8 Step:776 Training_loss:0.552369 Training_loss_avg:0.457239
Epoch:8 Step:784 Training_loss:0.149184 Training_loss_avg:0.455073
Epoch:8 Step:792 Training_loss:0.378834 Training_loss_avg:0.450694
Epoch:8 Step:800 Training_loss:0.525532 Training_loss_avg:0.447139
Epoch:8 Step:808 Training_loss:0.529774 Training_loss_avg:0.447314
Epoch:8 Step:816 Training_loss:0.677436 Training_loss_avg:0.450928
Epoch:8 Step:824 Training_loss:0.207041 Training_loss_avg:0.448330
Epoch:8 Step:832 Training_loss:0.352878 Training_loss_avg:0.447074
Epoch:8 Step:840 Training_loss:0.513278 Training_loss_avg:0.448278
Epoch:8 Step:848 Training_loss:0.393852 Training_loss_avg:0.450605
Epoch:8 Step:856 Training_loss:0.406044 Training_loss_avg:0.449791
Epoch:8 Step:864 Training_l

52it [00:07,  6.63it/s]


Epoch:8 Step:992 Val_loss:0.728338
Epoch:8 Step:1000 Training_loss:0.501155 Training_loss_avg:0.431905
Epoch:8 Step:1008 Training_loss:0.299408 Training_loss_avg:0.421880
Epoch:8 Step:1016 Training_loss:0.387706 Training_loss_avg:0.423938
Epoch:8 Step:1024 Training_loss:0.218105 Training_loss_avg:0.421350
Epoch:8 Step:1032 Training_loss:0.224360 Training_loss_avg:0.419133
Epoch:8 Step:1040 Training_loss:0.373919 Training_loss_avg:0.416208
Epoch:8 Step:1048 Training_loss:0.374186 Training_loss_avg:0.412859
Epoch:8 Step:1056 Training_loss:0.423968 Training_loss_avg:0.416101
Epoch:8 Step:1064 Training_loss:0.327668 Training_loss_avg:0.409355
Epoch:8 Step:1072 Training_loss:0.345789 Training_loss_avg:0.410890
Epoch:8 Step:1080 Training_loss:0.195398 Training_loss_avg:0.406245
Epoch:8 Step:1088 Training_loss:0.485198 Training_loss_avg:0.404712
Epoch:8 Step:1096 Training_loss:0.267814 Training_loss_avg:0.406152
Epoch:8 Step:1104 Training_loss:0.262711 Training_loss_avg:0.398073
Epoch:8 Step:

52it [00:07,  6.64it/s]


Epoch:8 Step:1240 Val_loss:0.761565
Epoch:8 Step:1248 Training_loss:0.416541 Training_loss_avg:0.388266
Epoch:8 Step:1256 Training_loss:0.686147 Training_loss_avg:0.393868
Epoch:8 Step:1264 Training_loss:0.321035 Training_loss_avg:0.392041
Epoch:8 Step:1272 Training_loss:0.479737 Training_loss_avg:0.395130
Epoch:8 Step:1280 Training_loss:0.675216 Training_loss_avg:0.390724
Epoch:8 Step:1288 Training_loss:0.280747 Training_loss_avg:0.388385
Epoch:8 Step:1296 Training_loss:0.897650 Training_loss_avg:0.401079
Epoch:8 Step:1304 Training_loss:0.854546 Training_loss_avg:0.409053
Epoch:8 Step:1312 Training_loss:0.497215 Training_loss_avg:0.415771
Epoch:8 Step:1320 Training_loss:0.275837 Training_loss_avg:0.415972
Epoch:8 Step:1328 Training_loss:0.341117 Training_loss_avg:0.416808
Epoch:8 Step:1336 Training_loss:0.392453 Training_loss_avg:0.416613
Epoch:8 Step:1344 Training_loss:0.207765 Training_loss_avg:0.415369
Epoch:8 Step:1352 Training_loss:0.362288 Training_loss_avg:0.418974
Epoch:8 Step

52it [00:07,  6.63it/s]


Epoch:8 Step:1488 Val_loss:0.880175
Epoch:8 Step:1496 Training_loss:0.379024 Training_loss_avg:0.426186
Epoch:8 Step:1504 Training_loss:0.379686 Training_loss_avg:0.428525
Epoch:8 Step:1512 Training_loss:0.227883 Training_loss_avg:0.429519
Epoch:8 Step:1520 Training_loss:0.769610 Training_loss_avg:0.437711
Epoch:8 Step:1528 Training_loss:0.636451 Training_loss_avg:0.440091
Epoch:8 Step:1536 Training_loss:0.422807 Training_loss_avg:0.430286
Epoch:8 Step:1544 Training_loss:0.044217 Training_loss_avg:0.424179
Epoch:8 Step:1552 Training_loss:0.361512 Training_loss_avg:0.425424
Epoch:8 Step:1560 Training_loss:0.310882 Training_loss_avg:0.427434
Epoch:8 Step:1568 Training_loss:0.678267 Training_loss_avg:0.429982
Epoch:8 Step:1576 Training_loss:0.490031 Training_loss_avg:0.433293
Epoch:8 Step:1584 Training_loss:0.453335 Training_loss_avg:0.433370
Epoch:8 Step:1592 Training_loss:0.361867 Training_loss_avg:0.439146
Epoch:8 Step:1600 Training_loss:0.226212 Training_loss_avg:0.431476
Epoch:8 Step

52it [00:07,  6.63it/s]


Epoch:8 Step:1736 Val_loss:0.800554
Epoch:8 Step:1744 Training_loss:0.471016 Training_loss_avg:0.398838
Epoch:8 Step:1752 Training_loss:0.328146 Training_loss_avg:0.398156
Epoch:8 Step:1760 Training_loss:0.334096 Training_loss_avg:0.394743
Epoch:8 Step:1768 Training_loss:0.469861 Training_loss_avg:0.398155
Epoch:8 Step:1776 Training_loss:0.320593 Training_loss_avg:0.398785
Epoch:8 Step:1784 Training_loss:0.451541 Training_loss_avg:0.394306
Epoch:8 Step:1792 Training_loss:0.344048 Training_loss_avg:0.388891
Epoch:8 Step:1800 Training_loss:0.306544 Training_loss_avg:0.388472
Epoch:8 Step:1808 Training_loss:0.223133 Training_loss_avg:0.380000
Epoch:8 Step:1816 Training_loss:0.215910 Training_loss_avg:0.376614
Epoch:8 Step:1824 Training_loss:0.223222 Training_loss_avg:0.372912
Epoch:8 Step:1832 Training_loss:0.594490 Training_loss_avg:0.375958
Epoch:8 Step:1840 Training_loss:0.152535 Training_loss_avg:0.371852
Epoch:8 Step:1848 Training_loss:0.151052 Training_loss_avg:0.366702
Epoch:8 Step

52it [00:07,  6.63it/s]


Epoch:8 Step:1984 Val_loss:0.924010
Epoch:8 Step:1992 Training_loss:0.341546 Training_loss_avg:0.343932
Epoch:8 Step:2000 Training_loss:0.449942 Training_loss_avg:0.348406
Epoch:8 Step:2008 Training_loss:0.397774 Training_loss_avg:0.348009
Epoch:8 Step:2016 Training_loss:0.325082 Training_loss_avg:0.347158
Epoch:8 Step:2024 Training_loss:0.623064 Training_loss_avg:0.355699
Epoch:8 Step:2032 Training_loss:0.628036 Training_loss_avg:0.363759
Epoch:8 Step:2040 Training_loss:0.183906 Training_loss_avg:0.359297
Epoch:8 Step:2048 Training_loss:0.527478 Training_loss_avg:0.364418
Epoch:8 Step:2056 Training_loss:0.243474 Training_loss_avg:0.362330
Epoch:8 Step:2064 Training_loss:0.290742 Training_loss_avg:0.361806
Epoch:8 Step:2072 Training_loss:0.660987 Training_loss_avg:0.367935
Epoch:8 Step:2080 Training_loss:0.335007 Training_loss_avg:0.369554
Epoch:8 Step:2088 Training_loss:0.396226 Training_loss_avg:0.362024
Epoch:8 Step:2096 Training_loss:0.447484 Training_loss_avg:0.363437
Epoch:8 Step

52it [00:07,  6.63it/s]


Epoch:8 Step:2232 Val_loss:0.850374
Epoch:8 Step:2240 Training_loss:0.245259 Training_loss_avg:0.357716
Epoch:8 Step:2248 Training_loss:0.602400 Training_loss_avg:0.366743
Epoch:8 Step:2256 Training_loss:0.117663 Training_loss_avg:0.361369
Epoch:8 Step:2264 Training_loss:0.390587 Training_loss_avg:0.362927
Epoch:8 Step:2272 Training_loss:0.120970 Training_loss_avg:0.356921
Epoch:8 Step:2280 Training_loss:0.486021 Training_loss_avg:0.356054
Epoch:8 Step:2288 Training_loss:0.205102 Training_loss_avg:0.345006
Epoch:8 Step:2296 Training_loss:0.801437 Training_loss_avg:0.357464
Epoch:8 Step:2304 Training_loss:0.528706 Training_loss_avg:0.359628
Epoch:8 Step:2312 Training_loss:0.188859 Training_loss_avg:0.357873
Epoch:8 Step:2320 Training_loss:0.184150 Training_loss_avg:0.358452
Epoch:8 Step:2328 Training_loss:0.328755 Training_loss_avg:0.358476
Epoch:8 Step:2336 Training_loss:0.179773 Training_loss_avg:0.357835
Epoch:8 Step:2344 Training_loss:0.310376 Training_loss_avg:0.355719
Epoch:8 Step

52it [00:07,  6.64it/s]


Epoch:8 Step:2480 Val_loss:0.880473
Epoch:8 Step:2488 Training_loss:0.452411 Training_loss_avg:0.377747
Epoch:8 Step:2496 Training_loss:0.367529 Training_loss_avg:0.376148
Epoch:8 Step:2504 Training_loss:0.687169 Training_loss_avg:0.382333
Epoch:8 Step:2512 Training_loss:0.789035 Training_loss_avg:0.388710
Epoch:8 Step:2520 Training_loss:0.341163 Training_loss_avg:0.389238
Epoch:8 Step:2528 Training_loss:0.379109 Training_loss_avg:0.391111
Epoch:8 Step:2536 Training_loss:0.440859 Training_loss_avg:0.393272
Epoch:8 Step:2544 Training_loss:0.398142 Training_loss_avg:0.398431
Epoch:8 Step:2552 Training_loss:0.150271 Training_loss_avg:0.391249
Epoch:8 Step:2560 Training_loss:0.196140 Training_loss_avg:0.386021
Epoch:8 Step:2568 Training_loss:0.319739 Training_loss_avg:0.386201
Epoch:8 Step:2576 Training_loss:0.252860 Training_loss_avg:0.387626
Epoch:8 Step:2584 Training_loss:0.426279 Training_loss_avg:0.381968
Epoch:8 Step:2592 Training_loss:0.123731 Training_loss_avg:0.379225
Epoch:8 Step

52it [00:07,  6.64it/s]


Epoch:8 Step:2728 Val_loss:0.785281
Epoch:8 Step:2736 Training_loss:0.423491 Training_loss_avg:0.402265
Epoch:8 Step:2744 Training_loss:0.429462 Training_loss_avg:0.404647
Epoch:8 Step:2752 Training_loss:0.354147 Training_loss_avg:0.402504
Epoch:8 Step:2760 Training_loss:0.494512 Training_loss_avg:0.402053
Epoch:8 Step:2768 Training_loss:0.331188 Training_loss_avg:0.393584
Epoch:8 Step:2776 Training_loss:0.310684 Training_loss_avg:0.388299
Epoch:8 Step:2784 Training_loss:0.099704 Training_loss_avg:0.382876
Epoch:8 Step:2792 Training_loss:0.509518 Training_loss_avg:0.387445
Epoch:8 Step:2800 Training_loss:0.512174 Training_loss_avg:0.385165
Epoch:8 Step:2808 Training_loss:0.234390 Training_loss_avg:0.387646
Epoch:8 Step:2816 Training_loss:0.318360 Training_loss_avg:0.382866
Epoch:8 Step:2824 Training_loss:0.374356 Training_loss_avg:0.379555
Epoch:8 Step:2832 Training_loss:0.183324 Training_loss_avg:0.378448
Epoch:8 Step:2840 Training_loss:0.044702 Training_loss_avg:0.376830
Epoch:8 Step

52it [00:07,  6.64it/s]


Epoch:8 Step:2976 Val_loss:1.035804
Epoch:8 Step:2984 Training_loss:0.547130 Training_loss_avg:0.364956
Epoch:8 Step:2992 Training_loss:0.624998 Training_loss_avg:0.374981
Epoch:8 Step:3000 Training_loss:0.483302 Training_loss_avg:0.379562
Epoch:8 Step:3008 Training_loss:0.284073 Training_loss_avg:0.371048
Epoch:8 Step:3016 Training_loss:0.472959 Training_loss_avg:0.374878
Epoch:8 Step:3024 Training_loss:0.455592 Training_loss_avg:0.377040
Epoch:8 Step:3032 Training_loss:0.425228 Training_loss_avg:0.377405
Epoch:8 Step:3040 Training_loss:0.365115 Training_loss_avg:0.376962
Epoch:8 Step:3048 Training_loss:0.271569 Training_loss_avg:0.376562
Epoch:8 Step:3056 Training_loss:0.313613 Training_loss_avg:0.377984
Epoch:8 Step:3064 Training_loss:0.409777 Training_loss_avg:0.376443
Epoch:8 Step:3072 Training_loss:0.464735 Training_loss_avg:0.382030
Epoch:8 Step:3080 Training_loss:0.280740 Training_loss_avg:0.372898
Epoch:8 Step:3088 Training_loss:0.217319 Training_loss_avg:0.368939
Epoch:8 Step

52it [00:07,  6.63it/s]


Epoch:8 Step:3224 Val_loss:0.893976
Epoch:8 Step:3232 Training_loss:0.450923 Training_loss_avg:0.379493
Epoch:8 Step:3240 Training_loss:0.340673 Training_loss_avg:0.385412
Epoch:8 Step:3248 Training_loss:0.562694 Training_loss_avg:0.388973
Epoch:8 Step:3256 Training_loss:0.073456 Training_loss_avg:0.383461
Epoch:8 Step:3264 Training_loss:0.310468 Training_loss_avg:0.383030
Epoch:8 Step:3272 Training_loss:0.492000 Training_loss_avg:0.386926
Epoch:8 Step:3280 Training_loss:0.153842 Training_loss_avg:0.380421
Epoch:8 Step:3288 Training_loss:0.567179 Training_loss_avg:0.386122
Epoch:8 Step:3296 Training_loss:0.262991 Training_loss_avg:0.385175
Epoch:8 Step:3304 Training_loss:0.198333 Training_loss_avg:0.386985
Epoch:8 Step:3312 Training_loss:0.343912 Training_loss_avg:0.392329
Epoch:8 Step:3320 Training_loss:0.179056 Training_loss_avg:0.382276
Epoch:8 Step:3328 Training_loss:0.207689 Training_loss_avg:0.377325
Epoch:9 Step:0 Training_loss:0.315604 Training_loss_avg:0.380957
Validating:


52it [00:07,  6.63it/s]


Epoch:9 Step:0 Val_loss:0.780471
Epoch:9 Step:8 Training_loss:0.676836 Training_loss_avg:0.381447
Epoch:9 Step:16 Training_loss:0.186094 Training_loss_avg:0.379805
Epoch:9 Step:24 Training_loss:0.296375 Training_loss_avg:0.383624
Epoch:9 Step:32 Training_loss:0.208537 Training_loss_avg:0.373650
Epoch:9 Step:40 Training_loss:0.233231 Training_loss_avg:0.370141
Epoch:9 Step:48 Training_loss:0.428535 Training_loss_avg:0.367770
Epoch:9 Step:56 Training_loss:0.192228 Training_loss_avg:0.359114
Epoch:9 Step:64 Training_loss:0.851291 Training_loss_avg:0.366474
Epoch:9 Step:72 Training_loss:0.375014 Training_loss_avg:0.368293
Epoch:9 Step:80 Training_loss:0.131472 Training_loss_avg:0.361463
Epoch:9 Step:88 Training_loss:0.226164 Training_loss_avg:0.356874
Epoch:9 Step:96 Training_loss:0.269163 Training_loss_avg:0.353753
Epoch:9 Step:104 Training_loss:0.266586 Training_loss_avg:0.351783
Epoch:9 Step:112 Training_loss:0.075956 Training_loss_avg:0.347870
Epoch:9 Step:120 Training_loss:0.205577 Tr

52it [00:07,  6.63it/s]


Epoch:9 Step:248 Val_loss:0.963283
Epoch:9 Step:256 Training_loss:0.091607 Training_loss_avg:0.290845
Epoch:9 Step:264 Training_loss:0.149288 Training_loss_avg:0.289089
Epoch:9 Step:272 Training_loss:0.477556 Training_loss_avg:0.296423
Epoch:9 Step:280 Training_loss:0.120879 Training_loss_avg:0.294027
Epoch:9 Step:288 Training_loss:0.417981 Training_loss_avg:0.298391
Epoch:9 Step:296 Training_loss:0.173021 Training_loss_avg:0.292833
Epoch:9 Step:304 Training_loss:0.583159 Training_loss_avg:0.297682
Epoch:9 Step:312 Training_loss:0.427227 Training_loss_avg:0.294973
Epoch:9 Step:320 Training_loss:0.242060 Training_loss_avg:0.298345
Epoch:9 Step:328 Training_loss:0.283402 Training_loss_avg:0.297804
Epoch:9 Step:336 Training_loss:0.034232 Training_loss_avg:0.288648
Epoch:9 Step:344 Training_loss:0.152067 Training_loss_avg:0.288613
Epoch:9 Step:352 Training_loss:0.309827 Training_loss_avg:0.283466
Epoch:9 Step:360 Training_loss:0.388409 Training_loss_avg:0.285974
Epoch:9 Step:368 Training_l

52it [00:07,  6.63it/s]


Epoch:9 Step:496 Val_loss:1.017160
Epoch:9 Step:504 Training_loss:0.288805 Training_loss_avg:0.267846
Epoch:9 Step:512 Training_loss:0.084434 Training_loss_avg:0.268016
Epoch:9 Step:520 Training_loss:0.387333 Training_loss_avg:0.271651
Epoch:9 Step:528 Training_loss:0.468320 Training_loss_avg:0.275406
Epoch:9 Step:536 Training_loss:0.797514 Training_loss_avg:0.278939
Epoch:9 Step:544 Training_loss:0.170761 Training_loss_avg:0.278447
Epoch:9 Step:552 Training_loss:0.125849 Training_loss_avg:0.273753
Epoch:9 Step:560 Training_loss:0.072384 Training_loss_avg:0.270504
Epoch:9 Step:568 Training_loss:0.319235 Training_loss_avg:0.271585
Epoch:9 Step:576 Training_loss:0.483460 Training_loss_avg:0.270925
Epoch:9 Step:584 Training_loss:0.245293 Training_loss_avg:0.271912
Epoch:9 Step:592 Training_loss:0.422737 Training_loss_avg:0.274881
Epoch:9 Step:600 Training_loss:0.968109 Training_loss_avg:0.290358
Epoch:9 Step:608 Training_loss:0.166240 Training_loss_avg:0.288239
Epoch:9 Step:616 Training_l

52it [00:07,  6.64it/s]


Epoch:9 Step:744 Val_loss:0.941568
Epoch:9 Step:752 Training_loss:0.413516 Training_loss_avg:0.319145
Epoch:9 Step:760 Training_loss:0.180580 Training_loss_avg:0.314989
Epoch:9 Step:768 Training_loss:0.138502 Training_loss_avg:0.312587
Epoch:9 Step:776 Training_loss:0.347229 Training_loss_avg:0.315068
Epoch:9 Step:784 Training_loss:0.429574 Training_loss_avg:0.319851
Epoch:9 Step:792 Training_loss:0.123897 Training_loss_avg:0.317056
Epoch:9 Step:800 Training_loss:0.237544 Training_loss_avg:0.317372
Epoch:9 Step:808 Training_loss:0.285907 Training_loss_avg:0.317978
Epoch:9 Step:816 Training_loss:0.103034 Training_loss_avg:0.317889
Epoch:9 Step:824 Training_loss:0.399642 Training_loss_avg:0.315645
Epoch:9 Step:832 Training_loss:0.251707 Training_loss_avg:0.318744
Epoch:9 Step:840 Training_loss:0.676584 Training_loss_avg:0.322937
Epoch:9 Step:848 Training_loss:0.741887 Training_loss_avg:0.331010
Epoch:9 Step:856 Training_loss:0.187445 Training_loss_avg:0.331545
Epoch:9 Step:864 Training_l

52it [00:07,  6.63it/s]


Epoch:9 Step:992 Val_loss:0.934359
Epoch:9 Step:1000 Training_loss:0.346031 Training_loss_avg:0.316050
Epoch:9 Step:1008 Training_loss:0.312775 Training_loss_avg:0.318981
Epoch:9 Step:1016 Training_loss:0.110657 Training_loss_avg:0.315092
Epoch:9 Step:1024 Training_loss:0.351732 Training_loss_avg:0.318929
Epoch:9 Step:1032 Training_loss:0.618011 Training_loss_avg:0.326854
Epoch:9 Step:1040 Training_loss:0.290389 Training_loss_avg:0.325262
Epoch:9 Step:1048 Training_loss:0.345357 Training_loss_avg:0.329678
Epoch:9 Step:1056 Training_loss:0.587628 Training_loss_avg:0.328842
Epoch:9 Step:1064 Training_loss:0.471639 Training_loss_avg:0.332156
Epoch:9 Step:1072 Training_loss:0.152131 Training_loss_avg:0.330914
Epoch:9 Step:1080 Training_loss:0.328450 Training_loss_avg:0.328573
Epoch:9 Step:1088 Training_loss:0.419604 Training_loss_avg:0.333957
Epoch:9 Step:1096 Training_loss:0.503148 Training_loss_avg:0.338523
Epoch:9 Step:1104 Training_loss:0.182910 Training_loss_avg:0.334048
Epoch:9 Step:

52it [00:07,  6.63it/s]


Epoch:9 Step:1240 Val_loss:0.870704
Epoch:9 Step:1248 Training_loss:1.073068 Training_loss_avg:0.331661
Epoch:9 Step:1256 Training_loss:0.208133 Training_loss_avg:0.332075
Epoch:9 Step:1264 Training_loss:0.195792 Training_loss_avg:0.326962
Epoch:9 Step:1272 Training_loss:0.257735 Training_loss_avg:0.326551
Epoch:9 Step:1280 Training_loss:0.114055 Training_loss_avg:0.326105
Epoch:9 Step:1288 Training_loss:0.294054 Training_loss_avg:0.330302
Epoch:9 Step:1296 Training_loss:0.401970 Training_loss_avg:0.331983
Epoch:9 Step:1304 Training_loss:0.402175 Training_loss_avg:0.335743
Epoch:9 Step:1312 Training_loss:0.142664 Training_loss_avg:0.328344
Epoch:9 Step:1320 Training_loss:0.196199 Training_loss_avg:0.314969
Epoch:9 Step:1328 Training_loss:0.142325 Training_loss_avg:0.312387
Epoch:9 Step:1336 Training_loss:0.412735 Training_loss_avg:0.318889
Epoch:9 Step:1344 Training_loss:0.670516 Training_loss_avg:0.326584
Epoch:9 Step:1352 Training_loss:0.312518 Training_loss_avg:0.329246
Epoch:9 Step

52it [00:07,  6.63it/s]


Epoch:9 Step:1488 Val_loss:0.935730
Epoch:9 Step:1496 Training_loss:0.836965 Training_loss_avg:0.317034
Epoch:9 Step:1504 Training_loss:0.346146 Training_loss_avg:0.320299
Epoch:9 Step:1512 Training_loss:0.154696 Training_loss_avg:0.319296
Epoch:9 Step:1520 Training_loss:0.224025 Training_loss_avg:0.320314
Epoch:9 Step:1528 Training_loss:0.232985 Training_loss_avg:0.315993
Epoch:9 Step:1536 Training_loss:0.291789 Training_loss_avg:0.315019
Epoch:9 Step:1544 Training_loss:0.300612 Training_loss_avg:0.313016
Epoch:9 Step:1552 Training_loss:0.393059 Training_loss_avg:0.313465
Epoch:9 Step:1560 Training_loss:0.206146 Training_loss_avg:0.310465
Epoch:9 Step:1568 Training_loss:0.364252 Training_loss_avg:0.314427
Epoch:9 Step:1576 Training_loss:0.192620 Training_loss_avg:0.311839
Epoch:9 Step:1584 Training_loss:0.228867 Training_loss_avg:0.314278
Epoch:9 Step:1592 Training_loss:0.458104 Training_loss_avg:0.316474
Epoch:9 Step:1600 Training_loss:0.210344 Training_loss_avg:0.315698
Epoch:9 Step

52it [00:07,  6.63it/s]


Epoch:9 Step:1736 Val_loss:0.939318
Epoch:9 Step:1744 Training_loss:0.144354 Training_loss_avg:0.299087
Epoch:9 Step:1752 Training_loss:0.063737 Training_loss_avg:0.294111
Epoch:9 Step:1760 Training_loss:0.143801 Training_loss_avg:0.286071
Epoch:9 Step:1768 Training_loss:0.386758 Training_loss_avg:0.290965
Epoch:9 Step:1776 Training_loss:0.211807 Training_loss_avg:0.287915
Epoch:9 Step:1784 Training_loss:0.444303 Training_loss_avg:0.293045
Epoch:9 Step:1792 Training_loss:0.498579 Training_loss_avg:0.296637
Epoch:9 Step:1800 Training_loss:0.313781 Training_loss_avg:0.300155
Epoch:9 Step:1808 Training_loss:0.306730 Training_loss_avg:0.302694
Epoch:9 Step:1816 Training_loss:0.265878 Training_loss_avg:0.299689
Epoch:9 Step:1824 Training_loss:0.215566 Training_loss_avg:0.301507
Epoch:9 Step:1832 Training_loss:0.381407 Training_loss_avg:0.299899
Epoch:9 Step:1840 Training_loss:0.071892 Training_loss_avg:0.291211
Epoch:9 Step:1848 Training_loss:0.268137 Training_loss_avg:0.294540
Epoch:9 Step

52it [00:07,  6.63it/s]


Epoch:9 Step:1984 Val_loss:0.943098
Epoch:9 Step:1992 Training_loss:0.251670 Training_loss_avg:0.280027
Epoch:9 Step:2000 Training_loss:0.709312 Training_loss_avg:0.290006
Epoch:9 Step:2008 Training_loss:0.368444 Training_loss_avg:0.291991
Epoch:9 Step:2016 Training_loss:0.225472 Training_loss_avg:0.294862
Epoch:9 Step:2024 Training_loss:0.281228 Training_loss_avg:0.294897
Epoch:9 Step:2032 Training_loss:0.470850 Training_loss_avg:0.295772
Epoch:9 Step:2040 Training_loss:0.232435 Training_loss_avg:0.294866
Epoch:9 Step:2048 Training_loss:0.466558 Training_loss_avg:0.298141
Epoch:9 Step:2056 Training_loss:0.551397 Training_loss_avg:0.303404
Epoch:9 Step:2064 Training_loss:0.416808 Training_loss_avg:0.306152
Epoch:9 Step:2072 Training_loss:0.156637 Training_loss_avg:0.304608
Epoch:9 Step:2080 Training_loss:0.557640 Training_loss_avg:0.307967
Epoch:9 Step:2088 Training_loss:0.373341 Training_loss_avg:0.309784
Epoch:9 Step:2096 Training_loss:0.689697 Training_loss_avg:0.320741
Epoch:9 Step

52it [00:07,  6.63it/s]


Epoch:9 Step:2232 Val_loss:0.888779
Epoch:9 Step:2240 Training_loss:0.170123 Training_loss_avg:0.317205
Epoch:9 Step:2248 Training_loss:0.301484 Training_loss_avg:0.317872
Epoch:9 Step:2256 Training_loss:0.092552 Training_loss_avg:0.312417
Epoch:9 Step:2264 Training_loss:0.508907 Training_loss_avg:0.317462
Epoch:9 Step:2272 Training_loss:0.171326 Training_loss_avg:0.316542
Epoch:9 Step:2280 Training_loss:0.493223 Training_loss_avg:0.322031
Epoch:9 Step:2288 Training_loss:0.256519 Training_loss_avg:0.314500
Epoch:9 Step:2296 Training_loss:0.433859 Training_loss_avg:0.321358
Epoch:9 Step:2304 Training_loss:0.669338 Training_loss_avg:0.331339
Epoch:9 Step:2312 Training_loss:0.239114 Training_loss_avg:0.332257
Epoch:9 Step:2320 Training_loss:0.164339 Training_loss_avg:0.327233
Epoch:9 Step:2328 Training_loss:0.105799 Training_loss_avg:0.325974
Epoch:9 Step:2336 Training_loss:0.227771 Training_loss_avg:0.323926
Epoch:9 Step:2344 Training_loss:0.825679 Training_loss_avg:0.331001
Epoch:9 Step

52it [00:07,  6.63it/s]


Epoch:9 Step:2480 Val_loss:0.900492
Epoch:9 Step:2488 Training_loss:0.283950 Training_loss_avg:0.337869
Epoch:9 Step:2496 Training_loss:0.557569 Training_loss_avg:0.335227
Epoch:9 Step:2504 Training_loss:0.428435 Training_loss_avg:0.342021
Epoch:9 Step:2512 Training_loss:0.236593 Training_loss_avg:0.337924
Epoch:9 Step:2520 Training_loss:0.211214 Training_loss_avg:0.334110
Epoch:9 Step:2528 Training_loss:0.487243 Training_loss_avg:0.340430
Epoch:9 Step:2536 Training_loss:0.090755 Training_loss_avg:0.340238
Epoch:9 Step:2544 Training_loss:0.363039 Training_loss_avg:0.339341
Epoch:9 Step:2552 Training_loss:0.603552 Training_loss_avg:0.344952
Epoch:9 Step:2560 Training_loss:0.324962 Training_loss_avg:0.350562
Epoch:9 Step:2568 Training_loss:0.381989 Training_loss_avg:0.340192
Epoch:9 Step:2576 Training_loss:0.350231 Training_loss_avg:0.345142
Epoch:9 Step:2584 Training_loss:0.477880 Training_loss_avg:0.347972
Epoch:9 Step:2592 Training_loss:0.309146 Training_loss_avg:0.348831
Epoch:9 Step

52it [00:07,  6.64it/s]


Epoch:9 Step:2728 Val_loss:0.868331
Epoch:9 Step:2736 Training_loss:0.783048 Training_loss_avg:0.369114
Epoch:9 Step:2744 Training_loss:0.463675 Training_loss_avg:0.361874
Epoch:9 Step:2752 Training_loss:0.533964 Training_loss_avg:0.362149
Epoch:9 Step:2760 Training_loss:0.397873 Training_loss_avg:0.365172
Epoch:9 Step:2768 Training_loss:0.403498 Training_loss_avg:0.361862
Epoch:9 Step:2776 Training_loss:0.252687 Training_loss_avg:0.359155
Epoch:9 Step:2784 Training_loss:0.462970 Training_loss_avg:0.364548
Epoch:9 Step:2792 Training_loss:0.154376 Training_loss_avg:0.360917
Epoch:9 Step:2800 Training_loss:0.269746 Training_loss_avg:0.361591
Epoch:9 Step:2808 Training_loss:0.379583 Training_loss_avg:0.361071
Epoch:9 Step:2816 Training_loss:0.503162 Training_loss_avg:0.363318
Epoch:9 Step:2824 Training_loss:0.289208 Training_loss_avg:0.366082
Epoch:9 Step:2832 Training_loss:0.153327 Training_loss_avg:0.360794
Epoch:9 Step:2840 Training_loss:0.148943 Training_loss_avg:0.357096
Epoch:9 Step

52it [00:07,  6.64it/s]


Epoch:9 Step:2976 Val_loss:1.000979
Epoch:9 Step:2984 Training_loss:0.314972 Training_loss_avg:0.342199
Epoch:9 Step:2992 Training_loss:0.172112 Training_loss_avg:0.339458
Epoch:9 Step:3000 Training_loss:0.401450 Training_loss_avg:0.334917
Epoch:9 Step:3008 Training_loss:0.222571 Training_loss_avg:0.334530
Epoch:9 Step:3016 Training_loss:0.500930 Training_loss_avg:0.335684
Epoch:9 Step:3024 Training_loss:0.521555 Training_loss_avg:0.342896
Epoch:9 Step:3032 Training_loss:0.166499 Training_loss_avg:0.337035
Epoch:9 Step:3040 Training_loss:0.379225 Training_loss_avg:0.339626
Epoch:9 Step:3048 Training_loss:0.271932 Training_loss_avg:0.340628
Epoch:9 Step:3056 Training_loss:0.535706 Training_loss_avg:0.346696
Epoch:9 Step:3064 Training_loss:0.205794 Training_loss_avg:0.343328
Epoch:9 Step:3072 Training_loss:0.622270 Training_loss_avg:0.350836
Epoch:9 Step:3080 Training_loss:0.080728 Training_loss_avg:0.347972
Epoch:9 Step:3088 Training_loss:0.376027 Training_loss_avg:0.345667
Epoch:9 Step

52it [00:07,  6.63it/s]


Epoch:9 Step:3224 Val_loss:0.921827
Epoch:9 Step:3232 Training_loss:0.487739 Training_loss_avg:0.331065
Epoch:9 Step:3240 Training_loss:0.428777 Training_loss_avg:0.336661
Epoch:9 Step:3248 Training_loss:0.204582 Training_loss_avg:0.336748
Epoch:9 Step:3256 Training_loss:0.387032 Training_loss_avg:0.338654
Epoch:9 Step:3264 Training_loss:0.163260 Training_loss_avg:0.337282
Epoch:9 Step:3272 Training_loss:0.291582 Training_loss_avg:0.338721
Epoch:9 Step:3280 Training_loss:0.446353 Training_loss_avg:0.343341
Epoch:9 Step:3288 Training_loss:0.829804 Training_loss_avg:0.352873
Epoch:9 Step:3296 Training_loss:0.667654 Training_loss_avg:0.356335
Epoch:9 Step:3304 Training_loss:0.663269 Training_loss_avg:0.362028
Epoch:9 Step:3312 Training_loss:0.566651 Training_loss_avg:0.366491
Epoch:9 Step:3320 Training_loss:0.240226 Training_loss_avg:0.367521
Epoch:9 Step:3328 Training_loss:0.755585 Training_loss_avg:0.374802
