In [2]:
%cd drive/My Drive/CS5814/HW3


/content/drive/My Drive/CS5814/HW3


In [45]:
pip install transformers




In [46]:
pip install xlsxwriter


Collecting xlsxwriter
  Downloading XlsxWriter-3.0.3-py3-none-any.whl (149 kB)
[?25l[K     |██▏                             | 10 kB 30.6 MB/s eta 0:00:01[K     |████▍                           | 20 kB 9.0 MB/s eta 0:00:01[K     |██████▌                         | 30 kB 7.8 MB/s eta 0:00:01[K     |████████▊                       | 40 kB 3.4 MB/s eta 0:00:01[K     |███████████                     | 51 kB 3.5 MB/s eta 0:00:01[K     |█████████████                   | 61 kB 4.1 MB/s eta 0:00:01[K     |███████████████▎                | 71 kB 4.3 MB/s eta 0:00:01[K     |█████████████████▌              | 81 kB 4.7 MB/s eta 0:00:01[K     |███████████████████▋            | 92 kB 5.2 MB/s eta 0:00:01[K     |█████████████████████▉          | 102 kB 4.1 MB/s eta 0:00:01[K     |████████████████████████        | 112 kB 4.1 MB/s eta 0:00:01[K     |██████████████████████████▏     | 122 kB 4.1 MB/s eta 0:00:01[K     |████████████████████████████▍   | 133 kB 4.1 MB/s eta 0:00:01

In [58]:
import json
import os
import random
import gc
import xlsxwriter
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm
from transformers import AdamW, AutoTokenizer, AutoModelForSequenceClassification
from transformers import RobertaTokenizer, RobertaConfig, RobertaModel
import pickle

import torch.nn as nn

torch.cuda.empty_cache()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)


cuda:0


In [5]:
def preprocess_data(file_loc='code_dataset.jsonl', generate_histogram=False):
    """
    Loads and processing the jsonl file,

    :param file_loc: location of target jsonl file
    :param generate_histogram: Flag to display histogram of function lengths
    :return: dataframe of preprocessed jsons
    """

    with open(file_loc, 'r') as json_file:
        json_list = list(json_file)

    code_list = []
    for json_str in json_list:
        result = json.loads(json_str)
        code_list.append(result)

    code_df = pd.DataFrame(code_list)

    total = code_df['target'].sum()
    proportion = total / code_df.shape[0]

    print("Insecure code counts: {}, Total code counts: {}, Proportion {}".format(total, code_df.shape[0], proportion))

    if generate_histogram:
        plt.hist(code_df['func'].str.len(), bins=100)
        plt.show()

    return code_df

In [6]:


def split_data(input_data, attention_data, label_data, train_ratio=0.8, val_ratio=0.10, max_len=512):
    """
    Splits data in accordance with provdied ratios, additionally discards functions with > max_len tokens
        as these will not be processed by the model will (can truncate, yet may truncate the error in the code)

    :param input_data: input functions
    :param attention_data: attention map
    :param label_data: target labels
    :param train_ratio: ratio of data to train on
    :param val_ratio: ratio of data to validate with (test is inferred from this and train)
    :param max_len: max number of tokens allowed for training date

    :return: 3 tuples for train val and test containing (input, attention, target)
    """
    # Removing excessively long elements from dataset
    valid_token_index = [i for i in range(len(input_data)) if len(input_data[i]) <= max_len]
    X_data = np.array(input_data)[valid_token_index]
    A_data = np.array(attention_data)[valid_token_index]
    Y_data = np.array(label_data)[valid_token_index]

    dataset_size = len(X_data)

    # Determining index to split dataset
    random_id = random.sample(range(dataset_size), dataset_size)
    train_split_id = int(train_ratio * dataset_size)
    val_split_id = int((train_ratio + val_ratio) * dataset_size)

    train_ids = random_id[:train_split_id]
    val_ids = random_id[train_split_id:val_split_id]
    test_ids = random_id[val_split_id:]

    X_train = torch.tensor(list(X_data[train_ids]))
    A_train = torch.tensor(list(A_data[train_ids]))
    Y_train = torch.tensor(list(Y_data[train_ids]))

    X_val = torch.tensor(list(X_data[val_ids]))
    A_val = torch.tensor(list(A_data[val_ids]))
    Y_val = torch.tensor(list(Y_data[val_ids]))

    X_test = torch.tensor(list(X_data[test_ids]))
    A_test = torch.tensor(list(A_data[test_ids]))
    Y_test = torch.tensor(list(Y_data[test_ids]))

    return (X_train, A_train, Y_train), (X_val, A_val, Y_val), (X_test, A_test, Y_test)



In [75]:

def tokenize(code_df, model_name='codebert-base'):
    """
    Apply the tokenizer from the huggingface pretrained model

    :param code_df: dataframe of preprocess code (from jsonl)
    :param model_name: model name (targeting local install)
    :return: 3 tuples for train val and test containing (input, attention, target)
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    inputs = tokenizer(code_df['func'].tolist(), truncation=False, padding='max_length', max_length=512)

    input_data = inputs['input_ids']
    attention_data = inputs['attention_mask']
    label_data = torch.tensor(code_df['target'].tolist())  # TODO - this can be directly converted to a np array

    print("Data points: {}".format(len(input_data)))

    return split_data(input_data, attention_data, label_data, max_len=512)



In [15]:
def tokenize_256(code_df, model_name='codebert-base'):
    """
    Apply the tokenizer from the huggingface pretrained model

    :param code_df: dataframe of preprocess code (from jsonl)
    :param model_name: model name (targeting local install)
    :return: 3 tuples for train val and test containing (input, attention, target)
    """
    tokenizer = RobertaTokenizer.from_pretrained(model_name)

    inputs = tokenizer.batch_encode_plus(code_df['func'].tolist(), truncation=False)

    input_data = inputs['input_ids']
    attention_data = inputs['attention_mask']
    label_data = torch.tensor(code_df['target'].tolist())  # TODO - this can be directly converted to a np array

    pruned_data = list(np.array(code_df['func'])[[i for i in range(len(input_data)) if len(input_data[i]) <= 256]])

    inputs = tokenizer.batch_encode_plus(pruned_data, truncation=False, padding='max_length', max_length=256)
    input_data = inputs['input_ids']
    attention_data = inputs['attention_mask']
    label_data = torch.tensor(code_df['target'].tolist())  # TODO - this can be directly converted to a np array


    return split_data(input_data, attention_data, label_data, max_len=512)


In [164]:

def train(model, train_data, val_data, epochs=5, batch_size=16, learning_rate=2e-5, validate_per=500,
          run_name="temp", run_descrption=None):
    """
    Main fine-tuning training loop for the provided model

    :param model: model loaded with predefined weights
    :param train_data: tuple of X_train, A_train, Y_train (X = inputs, A = attention, Y = target)
    :param val_data: tuple X_val, A_val, Y_val
    :param epochs: number of epochs for training
    :param batch_size: batch size (see note below about batch_hack)
    :param learning_rate: optimizer learning rate
    :param validate_per: number of weight updates before validation occurs
                            (notes: - if batch_size = 32, and validate_per = 32, validation will occur every batch
                                    - this is wrt the start of each epoch
                                    - validation will always occour at the start of each epoch (step 0))
    :param run_name: name used to saving checkpoints and log files within codebert_finetune_runs
    :param run_descrption: string that is saved to info.txt describing the run


    :return: None (models are saved in checkpoints along with log data)
    """


    # Saving run description.txt
    if run_descrption is not None:
        with open("{}/info.txt".format(dir_name), "a+") as f:
            f.write(run_descrption)

    # Unpacking data
    X_train, A_train, Y_train = train_data
    X_val, A_val, Y_val = val_data


    batch_hack = batch_size  # See note below regarding limited GPU memory

    # Initializing arrays for tracking loss
    train_loss_hist = []
    val_loss_hist = []
    train_pred_hist = []
    # Counter to track batches (see note below related to GPU memory)
    batch_count = 0
    # validate_per_batch = int(validate_per/batch_hack)

    # Moving model to GPU if configured
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=learning_rate)

    validate_per = int(validate_per/batch_size)

    for epoch in range(epochs):

        # Generating random index for manual shuffling of data each epoch as note using DataLoaders
        permutation = torch.randperm(X_train.shape[0])

        # Note here that only a single element is loaded at each iteration (batch size = 1) due to GPU memory constraint
        for batch_id, i in enumerate(range(0, X_train.shape[0], batch_hack)):

            # Loading batch and moving to device
            indices = permutation[i:i + batch_hack]
            batch_X, batch_Y, batch_A = X_train[indices].to(device), Y_train[indices].to(device), A_train[indices].to(device)

            model.train()

            # Forward pass
            outputs = model(batch_X,labels=batch_Y, attention_mask=batch_A)
            #loss = criterion(loss_clsf.float(), batch_Y_one_hot.float())
            loss = outputs.loss
            #print(loss)

            # Backward pass
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            loss_clsf = nn.Softmax(dim=1)(outputs.logits)
            acc = np.average(torch.eq(batch_Y.cpu(), loss_clsf.argmax(axis=1).cpu()))
            #rint(correct)

            # Tracking loss
            train_loss_hist.append(float(loss.item()))
            train_pred_hist.append(acc)

            # Training output
            train_output = "Epoch:{} Step:{} Training_loss:{:.6f}, Acc_avg:{:.2f}%".format(epoch, i, loss.item(), np.sum(100*train_pred_hist[-50:])/min(len(train_pred_hist), 50))
            print(train_output+" Training_loss_avg:{:.6f}".format(np.average(train_loss_hist[-50:])))
            with open("{}/train_loss.txt".format(dir_name), "a+") as f:
                f.write(train_output+"\n")

            # Validation
            if batch_id % validate_per == 0:
                val_loss_total = 0
                model.eval()
                print("Validating:")
                val_acc = []
                for val_badtch_id, j in tqdm(enumerate(range(0, X_val.shape[0], batch_hack))):
                    # Loading singular validation data (overwrites train data as can only load 1 intp GPU)
                    batch_X, batch_Y, batch_A = X_val[j:j+batch_hack].to(device), Y_val[j:j+batch_hack].to(device), A_val[j:j+batch_hack].to(device)

                    with torch.no_grad():
                        val_outputs = model(batch_X, labels=batch_Y, attention_mask=batch_A)
                    val_loss_total += float(val_outputs['loss'].item())

                    
                    val_clsf = nn.Softmax(dim=1)(val_outputs.logits)
                    val_acc.append(np.average(torch.eq(batch_Y.cpu(), val_clsf.argmax(axis=1).cpu())))

                    del batch_X
                    del batch_Y

                # Adding average loss to tracker
                val_average = val_loss_total / (val_badtch_id+1)
                val_loss_hist.append(val_average)

                # Validation output and logging
                val_output = "Epoch:{} Step:{} Val_loss:{:.6f}, Val_Acc_avg:{:.2f}%".format(epoch, i, val_average, np.sum(100*val_acc[-50:])/min(len(val_acc), 50))
                print(val_output)
                with open("{}/val_los.txt".format(dir_name), "a+") as f:
                    f.write(val_output+"\n")

        # End of epoch checkpoint
        model.save_pretrained("{}/epoch_{}".format(dir_name, epoch + 1))



In [None]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """

    run_name = "test_colab_GPU_highRAM_8_val_8"
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False

    code_df = preprocess_data(file_loc='code_dataset.jsonl')
    train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

    # Loading model from checkpoint if location provided
    if online:
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=5,
          batch_size=8,
          learning_rate=2e-5,
          validate_per=100,
          run_name=run_name,
          run_descrption="Colab with highRam, lr=2e-5, validate per 100, batch 16")


In [None]:
gc.collect()
torch.cuda.empty_cache()
main()

In [None]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """

    run_name = "test_colab_GPU_highRAM_8_lr_5e-6"
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False

    code_df = preprocess_data(file_loc='code_dataset.jsonl')
    train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

    # Loading model from checkpoint if location provided
    if online:
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=5,
          batch_size=8,
          learning_rate=5e-6,
          validate_per=250,
          run_name=run_name,
          run_descrption="Colab with highRam, lr=5e-6, validate per 259, batch 8")


In [None]:
gc.collect()
torch.cuda.empty_cache()
main()

In [None]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """

    run_name = "test_colab_GPU_highRAM_8_lr_5e-6_10epochs"
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False

    code_df = preprocess_data(file_loc='code_dataset.jsonl')
    train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

    # Loading model from checkpoint if location provided
    if online:
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=10,
          batch_size=8,
          learning_rate=5e-6,
          validate_per=250,
          run_name=run_name,
          run_descrption="Colab with highRam, lr=5e-6, validate per 250, batch 8, 10epochs")


In [None]:
gc.collect()
torch.cuda.empty_cache()
main()

In [None]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """

    run_name = "test_colab_GPU_highRAM_very__verylow_very_lr_(5e-7)"
    model_name = 'codebert-base'
    checkpoint_location = '/content/drive/MyDrive/CS5814/HW3/codebert_finetune_runs/test_colab_GPU_highRAM_8_lr_5e-6/epoch_5'
    online = False

    code_df = preprocess_data(file_loc='code_dataset.jsonl')
    train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

    # Loading model from checkpoint if location provided
    if online:
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=5,
          batch_size=8,
          learning_rate=5e-6,
          validate_per=250,
          run_name=run_name,
          run_descrption="Colab with highRam, lr=5e-7, validate per 250, batch 8, 5epochs")


In [None]:
gc.collect()
torch.cuda.empty_cache()
main()

In [22]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """

    run_name = "Softmax + CEL_3e-6 acc 3 good?"
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False

    code_df = preprocess_data(file_loc='code_dataset.jsonl')
    train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

    # Loading model from checkpoint if location provided
    if online:
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=5,
          batch_size=8,
          learning_rate=3e-6,
          validate_per=250,
          run_name=run_name,
          run_descrption="Colab with Softmax + CrossEntropyLoss, lr=3e-6, validate per 250, batch 8, 5 epochs")


In [23]:
torch.cuda.empty_cache()
main()

Insecure code counts: 3729, Total code counts: 8000, Proportion 0.466125


Some weights of the model checkpoint at codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at codebert-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for p

Epoch:0 Step:0 Training_loss:0.633118, Acc_avg:62.50% Training_loss_avg:0.633118
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:0 Val_loss:0.673894, Val_Acc_avg:56.94%
Epoch:0 Step:8 Training_loss:0.675437, Acc_avg:68.75% Training_loss_avg:0.654277
Epoch:0 Step:16 Training_loss:0.779569, Acc_avg:58.33% Training_loss_avg:0.696041
Epoch:0 Step:24 Training_loss:0.750523, Acc_avg:53.12% Training_loss_avg:0.709662
Epoch:0 Step:32 Training_loss:0.651663, Acc_avg:57.50% Training_loss_avg:0.698062
Epoch:0 Step:40 Training_loss:0.725335, Acc_avg:54.17% Training_loss_avg:0.702607
Epoch:0 Step:48 Training_loss:0.818100, Acc_avg:50.00% Training_loss_avg:0.719106
Epoch:0 Step:56 Training_loss:0.605680, Acc_avg:53.12% Training_loss_avg:0.704928
Epoch:0 Step:64 Training_loss:0.646780, Acc_avg:55.56% Training_loss_avg:0.698467
Epoch:0 Step:72 Training_loss:0.807646, Acc_avg:52.50% Training_loss_avg:0.709385
Epoch:0 Step:80 Training_loss:0.726775, Acc_avg:51.14% Training_loss_avg:0.710966
Epoch:0 Step:88 Training_loss:0.629971, Acc_avg:53.12% Training_loss_avg:0.704216
Epoch:0 Step:96 Training_loss:0.709214, Acc_av

52it [00:07,  6.61it/s]


Epoch:0 Step:248 Val_loss:0.673586, Val_Acc_avg:59.25%
Epoch:0 Step:256 Training_loss:0.587489, Acc_avg:57.20% Training_loss_avg:0.685407
Epoch:0 Step:264 Training_loss:0.622195, Acc_avg:57.35% Training_loss_avg:0.683548
Epoch:0 Step:272 Training_loss:0.768407, Acc_avg:56.79% Training_loss_avg:0.685972
Epoch:0 Step:280 Training_loss:0.771040, Acc_avg:56.25% Training_loss_avg:0.688335
Epoch:0 Step:288 Training_loss:0.690655, Acc_avg:56.42% Training_loss_avg:0.688398
Epoch:0 Step:296 Training_loss:0.682747, Acc_avg:56.58% Training_loss_avg:0.688249
Epoch:0 Step:304 Training_loss:0.623555, Acc_avg:57.05% Training_loss_avg:0.686590
Epoch:0 Step:312 Training_loss:0.648722, Acc_avg:57.19% Training_loss_avg:0.685644
Epoch:0 Step:320 Training_loss:0.784786, Acc_avg:56.71% Training_loss_avg:0.688062
Epoch:0 Step:328 Training_loss:0.716739, Acc_avg:55.95% Training_loss_avg:0.688745
Epoch:0 Step:336 Training_loss:0.889229, Acc_avg:54.94% Training_loss_avg:0.693407
Epoch:0 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:0 Step:496 Val_loss:0.688325, Val_Acc_avg:54.62%
Epoch:0 Step:504 Training_loss:0.697506, Acc_avg:53.50% Training_loss_avg:0.695889
Epoch:0 Step:512 Training_loss:0.681556, Acc_avg:53.25% Training_loss_avg:0.695302
Epoch:0 Step:520 Training_loss:0.664397, Acc_avg:53.75% Training_loss_avg:0.694138
Epoch:0 Step:528 Training_loss:0.733375, Acc_avg:53.00% Training_loss_avg:0.695931
Epoch:0 Step:536 Training_loss:0.746494, Acc_avg:52.00% Training_loss_avg:0.697211
Epoch:0 Step:544 Training_loss:0.697829, Acc_avg:52.25% Training_loss_avg:0.697005
Epoch:0 Step:552 Training_loss:0.688415, Acc_avg:53.00% Training_loss_avg:0.695807
Epoch:0 Step:560 Training_loss:0.693687, Acc_avg:52.75% Training_loss_avg:0.696247
Epoch:0 Step:568 Training_loss:0.677974, Acc_avg:52.75% Training_loss_avg:0.697120
Epoch:0 Step:576 Training_loss:0.669706, Acc_avg:53.25% Training_loss_avg:0.696827
Epoch:0 Step:584 Training_loss:0.680473, Acc_avg:53.50% Training_loss_avg:0.695781
Epoch:0 Step:592 Training_loss:0

52it [00:07,  6.59it/s]


Epoch:0 Step:744 Val_loss:0.674684, Val_Acc_avg:59.25%
Epoch:0 Step:752 Training_loss:0.790463, Acc_avg:51.25% Training_loss_avg:0.702933
Epoch:0 Step:760 Training_loss:0.681762, Acc_avg:51.50% Training_loss_avg:0.702306
Epoch:0 Step:768 Training_loss:0.584669, Acc_avg:51.50% Training_loss_avg:0.701028
Epoch:0 Step:776 Training_loss:0.740703, Acc_avg:51.75% Training_loss_avg:0.699270
Epoch:0 Step:784 Training_loss:0.635990, Acc_avg:52.25% Training_loss_avg:0.697523
Epoch:0 Step:792 Training_loss:0.617338, Acc_avg:52.75% Training_loss_avg:0.695795
Epoch:0 Step:800 Training_loss:0.613840, Acc_avg:54.00% Training_loss_avg:0.692115
Epoch:0 Step:808 Training_loss:0.673402, Acc_avg:53.75% Training_loss_avg:0.692072
Epoch:0 Step:816 Training_loss:0.680003, Acc_avg:54.50% Training_loss_avg:0.691099
Epoch:0 Step:824 Training_loss:0.653167, Acc_avg:55.00% Training_loss_avg:0.689452
Epoch:0 Step:832 Training_loss:0.637615, Acc_avg:55.50% Training_loss_avg:0.687297
Epoch:0 Step:840 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:0 Step:992 Val_loss:0.678709, Val_Acc_avg:47.69%
Epoch:0 Step:1000 Training_loss:0.706854, Acc_avg:51.50% Training_loss_avg:0.698816
Epoch:0 Step:1008 Training_loss:0.681189, Acc_avg:51.25% Training_loss_avg:0.699400
Epoch:0 Step:1016 Training_loss:0.711231, Acc_avg:51.00% Training_loss_avg:0.700089
Epoch:0 Step:1024 Training_loss:0.702105, Acc_avg:50.50% Training_loss_avg:0.701158
Epoch:0 Step:1032 Training_loss:0.757104, Acc_avg:50.00% Training_loss_avg:0.702223
Epoch:0 Step:1040 Training_loss:0.738499, Acc_avg:50.50% Training_loss_avg:0.702403
Epoch:0 Step:1048 Training_loss:0.749651, Acc_avg:50.50% Training_loss_avg:0.702162
Epoch:0 Step:1056 Training_loss:0.688095, Acc_avg:51.00% Training_loss_avg:0.701482
Epoch:0 Step:1064 Training_loss:0.662426, Acc_avg:50.50% Training_loss_avg:0.702928
Epoch:0 Step:1072 Training_loss:0.709186, Acc_avg:50.00% Training_loss_avg:0.703552
Epoch:0 Step:1080 Training_loss:0.662831, Acc_avg:50.25% Training_loss_avg:0.702070
Epoch:0 Step:1088 Tra

52it [00:07,  6.59it/s]


Epoch:0 Step:1240 Val_loss:0.698467, Val_Acc_avg:52.31%
Epoch:0 Step:1248 Training_loss:0.724404, Acc_avg:47.75% Training_loss_avg:0.709330
Epoch:0 Step:1256 Training_loss:0.692155, Acc_avg:48.25% Training_loss_avg:0.708107
Epoch:0 Step:1264 Training_loss:0.716552, Acc_avg:47.25% Training_loss_avg:0.710103
Epoch:0 Step:1272 Training_loss:0.666370, Acc_avg:47.25% Training_loss_avg:0.709259
Epoch:0 Step:1280 Training_loss:0.673482, Acc_avg:48.50% Training_loss_avg:0.706711
Epoch:0 Step:1288 Training_loss:0.697000, Acc_avg:48.50% Training_loss_avg:0.707086
Epoch:0 Step:1296 Training_loss:0.678200, Acc_avg:49.50% Training_loss_avg:0.705160
Epoch:0 Step:1304 Training_loss:0.709370, Acc_avg:49.00% Training_loss_avg:0.707450
Epoch:0 Step:1312 Training_loss:0.700345, Acc_avg:48.75% Training_loss_avg:0.707184
Epoch:0 Step:1320 Training_loss:0.744075, Acc_avg:48.75% Training_loss_avg:0.706611
Epoch:0 Step:1328 Training_loss:0.713192, Acc_avg:48.50% Training_loss_avg:0.705504
Epoch:0 Step:1336 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:1488 Val_loss:0.675637, Val_Acc_avg:56.94%
Epoch:0 Step:1496 Training_loss:0.808376, Acc_avg:51.25% Training_loss_avg:0.702667
Epoch:0 Step:1504 Training_loss:0.654480, Acc_avg:51.50% Training_loss_avg:0.702108
Epoch:0 Step:1512 Training_loss:0.671999, Acc_avg:51.50% Training_loss_avg:0.701587
Epoch:0 Step:1520 Training_loss:0.679135, Acc_avg:51.50% Training_loss_avg:0.700877
Epoch:0 Step:1528 Training_loss:0.660422, Acc_avg:51.75% Training_loss_avg:0.699574
Epoch:0 Step:1536 Training_loss:0.649658, Acc_avg:52.00% Training_loss_avg:0.698261
Epoch:0 Step:1544 Training_loss:0.745932, Acc_avg:51.25% Training_loss_avg:0.700237
Epoch:0 Step:1552 Training_loss:0.700784, Acc_avg:51.25% Training_loss_avg:0.700403
Epoch:0 Step:1560 Training_loss:0.635718, Acc_avg:51.75% Training_loss_avg:0.698514
Epoch:0 Step:1568 Training_loss:0.632935, Acc_avg:52.75% Training_loss_avg:0.696634
Epoch:0 Step:1576 Training_loss:0.686631, Acc_avg:53.25% Training_loss_avg:0.696241
Epoch:0 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:1736 Val_loss:0.674698, Val_Acc_avg:54.62%
Epoch:0 Step:1744 Training_loss:0.647756, Acc_avg:58.50% Training_loss_avg:0.688058
Epoch:0 Step:1752 Training_loss:0.649183, Acc_avg:58.25% Training_loss_avg:0.687835
Epoch:0 Step:1760 Training_loss:0.618557, Acc_avg:58.50% Training_loss_avg:0.686473
Epoch:0 Step:1768 Training_loss:0.636756, Acc_avg:58.50% Training_loss_avg:0.685674
Epoch:0 Step:1776 Training_loss:0.621527, Acc_avg:59.00% Training_loss_avg:0.683282
Epoch:0 Step:1784 Training_loss:0.711278, Acc_avg:58.75% Training_loss_avg:0.684346
Epoch:0 Step:1792 Training_loss:0.640794, Acc_avg:58.50% Training_loss_avg:0.683946
Epoch:0 Step:1800 Training_loss:0.692649, Acc_avg:58.50% Training_loss_avg:0.683914
Epoch:0 Step:1808 Training_loss:0.677521, Acc_avg:58.50% Training_loss_avg:0.683686
Epoch:0 Step:1816 Training_loss:0.746268, Acc_avg:58.00% Training_loss_avg:0.685597
Epoch:0 Step:1824 Training_loss:0.750588, Acc_avg:58.50% Training_loss_avg:0.684986
Epoch:0 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:1984 Val_loss:0.672317, Val_Acc_avg:59.25%
Epoch:0 Step:1992 Training_loss:0.659366, Acc_avg:60.50% Training_loss_avg:0.676197
Epoch:0 Step:2000 Training_loss:0.565024, Acc_avg:61.25% Training_loss_avg:0.673245
Epoch:0 Step:2008 Training_loss:0.646151, Acc_avg:61.75% Training_loss_avg:0.672213
Epoch:0 Step:2016 Training_loss:0.646275, Acc_avg:61.75% Training_loss_avg:0.671506
Epoch:0 Step:2024 Training_loss:0.784107, Acc_avg:61.25% Training_loss_avg:0.673909
Epoch:0 Step:2032 Training_loss:0.689633, Acc_avg:60.75% Training_loss_avg:0.674730
Epoch:0 Step:2040 Training_loss:0.707167, Acc_avg:60.50% Training_loss_avg:0.675529
Epoch:0 Step:2048 Training_loss:0.860641, Acc_avg:60.25% Training_loss_avg:0.678436
Epoch:0 Step:2056 Training_loss:0.775694, Acc_avg:59.50% Training_loss_avg:0.679309
Epoch:0 Step:2064 Training_loss:0.731487, Acc_avg:58.75% Training_loss_avg:0.681439
Epoch:0 Step:2072 Training_loss:0.699891, Acc_avg:58.75% Training_loss_avg:0.681462
Epoch:0 Step:2080 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:2232 Val_loss:0.671919, Val_Acc_avg:56.94%
Epoch:0 Step:2240 Training_loss:0.640401, Acc_avg:58.75% Training_loss_avg:0.677915
Epoch:0 Step:2248 Training_loss:0.613197, Acc_avg:59.00% Training_loss_avg:0.676381
Epoch:0 Step:2256 Training_loss:0.669889, Acc_avg:59.50% Training_loss_avg:0.675136
Epoch:0 Step:2264 Training_loss:0.767562, Acc_avg:59.00% Training_loss_avg:0.676563
Epoch:0 Step:2272 Training_loss:0.762321, Acc_avg:58.75% Training_loss_avg:0.677100
Epoch:0 Step:2280 Training_loss:0.613891, Acc_avg:58.75% Training_loss_avg:0.675813
Epoch:0 Step:2288 Training_loss:0.759069, Acc_avg:59.00% Training_loss_avg:0.676727
Epoch:0 Step:2296 Training_loss:0.725534, Acc_avg:58.75% Training_loss_avg:0.678179
Epoch:0 Step:2304 Training_loss:0.587636, Acc_avg:59.50% Training_loss_avg:0.674400
Epoch:0 Step:2312 Training_loss:0.629180, Acc_avg:59.25% Training_loss_avg:0.674040
Epoch:0 Step:2320 Training_loss:0.735448, Acc_avg:59.00% Training_loss_avg:0.675287
Epoch:0 Step:2328 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:2480 Val_loss:0.683071, Val_Acc_avg:52.31%
Epoch:0 Step:2488 Training_loss:0.651610, Acc_avg:54.25% Training_loss_avg:0.686527
Epoch:0 Step:2496 Training_loss:0.684413, Acc_avg:53.50% Training_loss_avg:0.689539
Epoch:0 Step:2504 Training_loss:0.702765, Acc_avg:53.25% Training_loss_avg:0.689480
Epoch:0 Step:2512 Training_loss:0.645277, Acc_avg:53.50% Training_loss_avg:0.689491
Epoch:0 Step:2520 Training_loss:0.714433, Acc_avg:53.00% Training_loss_avg:0.690051
Epoch:0 Step:2528 Training_loss:0.699144, Acc_avg:52.25% Training_loss_avg:0.692433
Epoch:0 Step:2536 Training_loss:0.682803, Acc_avg:52.00% Training_loss_avg:0.692724
Epoch:0 Step:2544 Training_loss:0.658111, Acc_avg:52.50% Training_loss_avg:0.691376
Epoch:0 Step:2552 Training_loss:0.683500, Acc_avg:53.00% Training_loss_avg:0.690314
Epoch:0 Step:2560 Training_loss:0.778614, Acc_avg:52.00% Training_loss_avg:0.692938
Epoch:0 Step:2568 Training_loss:0.657840, Acc_avg:52.25% Training_loss_avg:0.693501
Epoch:0 Step:2576 Tr

52it [00:07,  6.58it/s]


Epoch:0 Step:2728 Val_loss:0.680177, Val_Acc_avg:59.25%
Epoch:0 Step:2736 Training_loss:0.711233, Acc_avg:51.75% Training_loss_avg:0.694380
Epoch:0 Step:2744 Training_loss:0.734969, Acc_avg:51.25% Training_loss_avg:0.694440
Epoch:0 Step:2752 Training_loss:0.631460, Acc_avg:51.75% Training_loss_avg:0.694072
Epoch:0 Step:2760 Training_loss:0.752168, Acc_avg:51.25% Training_loss_avg:0.695789
Epoch:0 Step:2768 Training_loss:0.709984, Acc_avg:51.25% Training_loss_avg:0.695695
Epoch:0 Step:2776 Training_loss:0.741533, Acc_avg:51.50% Training_loss_avg:0.696313
Epoch:0 Step:2784 Training_loss:0.704575, Acc_avg:51.75% Training_loss_avg:0.694431
Epoch:0 Step:2792 Training_loss:0.696760, Acc_avg:51.50% Training_loss_avg:0.694192
Epoch:0 Step:2800 Training_loss:0.719350, Acc_avg:51.25% Training_loss_avg:0.694604
Epoch:0 Step:2808 Training_loss:0.705699, Acc_avg:51.00% Training_loss_avg:0.695153
Epoch:0 Step:2816 Training_loss:0.752212, Acc_avg:50.50% Training_loss_avg:0.696288
Epoch:0 Step:2824 Tr

52it [00:07,  6.58it/s]


Epoch:0 Step:2976 Val_loss:0.676400, Val_Acc_avg:52.31%
Epoch:0 Step:2984 Training_loss:0.731210, Acc_avg:52.75% Training_loss_avg:0.693802
Epoch:0 Step:2992 Training_loss:0.647902, Acc_avg:53.50% Training_loss_avg:0.692868
Epoch:0 Step:3000 Training_loss:0.694663, Acc_avg:53.50% Training_loss_avg:0.693593
Epoch:0 Step:3008 Training_loss:0.744917, Acc_avg:53.25% Training_loss_avg:0.695055
Epoch:0 Step:3016 Training_loss:0.672413, Acc_avg:53.50% Training_loss_avg:0.695204
Epoch:0 Step:3024 Training_loss:0.756569, Acc_avg:52.75% Training_loss_avg:0.696465
Epoch:0 Step:3032 Training_loss:0.711945, Acc_avg:52.50% Training_loss_avg:0.698509
Epoch:0 Step:3040 Training_loss:0.587892, Acc_avg:53.00% Training_loss_avg:0.696591
Epoch:0 Step:3048 Training_loss:0.636423, Acc_avg:53.50% Training_loss_avg:0.695154
Epoch:0 Step:3056 Training_loss:0.719460, Acc_avg:52.75% Training_loss_avg:0.695857
Epoch:0 Step:3064 Training_loss:0.660326, Acc_avg:53.00% Training_loss_avg:0.694999
Epoch:0 Step:3072 Tr

52it [00:07,  6.58it/s]


Epoch:0 Step:3224 Val_loss:0.671606, Val_Acc_avg:59.25%
Epoch:0 Step:3232 Training_loss:0.856151, Acc_avg:58.25% Training_loss_avg:0.686259
Epoch:0 Step:3240 Training_loss:0.605412, Acc_avg:58.75% Training_loss_avg:0.684805
Epoch:0 Step:3248 Training_loss:0.747885, Acc_avg:58.75% Training_loss_avg:0.685526
Epoch:0 Step:3256 Training_loss:0.674860, Acc_avg:58.00% Training_loss_avg:0.686277
Epoch:0 Step:3264 Training_loss:0.813738, Acc_avg:57.50% Training_loss_avg:0.689493
Epoch:0 Step:3272 Training_loss:0.780767, Acc_avg:56.50% Training_loss_avg:0.692436
Epoch:0 Step:3280 Training_loss:0.577368, Acc_avg:57.25% Training_loss_avg:0.688155
Epoch:0 Step:3288 Training_loss:0.711103, Acc_avg:56.75% Training_loss_avg:0.688800
Epoch:0 Step:3296 Training_loss:0.649442, Acc_avg:57.25% Training_loss_avg:0.687442
Epoch:0 Step:3304 Training_loss:0.897642, Acc_avg:56.50% Training_loss_avg:0.692693
Epoch:0 Step:3312 Training_loss:0.656272, Acc_avg:55.75% Training_loss_avg:0.692533
Epoch:0 Step:3320 Tr

52it [00:07,  6.58it/s]


Epoch:1 Step:0 Val_loss:0.670919, Val_Acc_avg:56.94%
Epoch:1 Step:8 Training_loss:0.661424, Acc_avg:56.33% Training_loss_avg:0.688440
Epoch:1 Step:16 Training_loss:0.627426, Acc_avg:57.08% Training_loss_avg:0.686525
Epoch:1 Step:24 Training_loss:0.651456, Acc_avg:57.08% Training_loss_avg:0.685536
Epoch:1 Step:32 Training_loss:0.686233, Acc_avg:57.33% Training_loss_avg:0.685505
Epoch:1 Step:40 Training_loss:0.636993, Acc_avg:57.33% Training_loss_avg:0.685103
Epoch:1 Step:48 Training_loss:0.756219, Acc_avg:57.58% Training_loss_avg:0.685603
Epoch:1 Step:56 Training_loss:0.704770, Acc_avg:57.08% Training_loss_avg:0.686741
Epoch:1 Step:64 Training_loss:0.645624, Acc_avg:56.83% Training_loss_avg:0.685760
Epoch:1 Step:72 Training_loss:0.689470, Acc_avg:56.83% Training_loss_avg:0.684651
Epoch:1 Step:80 Training_loss:0.656817, Acc_avg:56.83% Training_loss_avg:0.684339
Epoch:1 Step:88 Training_loss:0.850107, Acc_avg:56.33% Training_loss_avg:0.686210
Epoch:1 Step:96 Training_loss:0.714254, Acc_av

52it [00:07,  6.61it/s]


Epoch:1 Step:248 Val_loss:0.676031, Val_Acc_avg:56.94%
Epoch:1 Step:256 Training_loss:0.669425, Acc_avg:54.58% Training_loss_avg:0.690428
Epoch:1 Step:264 Training_loss:0.700634, Acc_avg:54.08% Training_loss_avg:0.690042
Epoch:1 Step:272 Training_loss:0.640767, Acc_avg:53.58% Training_loss_avg:0.690594
Epoch:1 Step:280 Training_loss:0.639362, Acc_avg:54.33% Training_loss_avg:0.688192
Epoch:1 Step:288 Training_loss:0.680163, Acc_avg:53.83% Training_loss_avg:0.691146
Epoch:1 Step:296 Training_loss:0.751259, Acc_avg:53.58% Training_loss_avg:0.689048
Epoch:1 Step:304 Training_loss:0.649287, Acc_avg:53.33% Training_loss_avg:0.689925
Epoch:1 Step:312 Training_loss:0.649945, Acc_avg:53.58% Training_loss_avg:0.687967
Epoch:1 Step:320 Training_loss:0.633382, Acc_avg:54.33% Training_loss_avg:0.687137
Epoch:1 Step:328 Training_loss:0.701534, Acc_avg:54.83% Training_loss_avg:0.684893
Epoch:1 Step:336 Training_loss:0.678997, Acc_avg:55.08% Training_loss_avg:0.682858
Epoch:1 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:1 Step:496 Val_loss:0.670810, Val_Acc_avg:59.25%
Epoch:1 Step:504 Training_loss:0.614219, Acc_avg:57.00% Training_loss_avg:0.680752
Epoch:1 Step:512 Training_loss:0.647177, Acc_avg:57.25% Training_loss_avg:0.679747
Epoch:1 Step:520 Training_loss:0.629281, Acc_avg:57.75% Training_loss_avg:0.677603
Epoch:1 Step:528 Training_loss:0.657759, Acc_avg:58.00% Training_loss_avg:0.675933
Epoch:1 Step:536 Training_loss:0.734813, Acc_avg:58.00% Training_loss_avg:0.677067
Epoch:1 Step:544 Training_loss:0.754751, Acc_avg:57.25% Training_loss_avg:0.679140
Epoch:1 Step:552 Training_loss:0.696458, Acc_avg:57.00% Training_loss_avg:0.680675
Epoch:1 Step:560 Training_loss:0.701172, Acc_avg:57.00% Training_loss_avg:0.680689
Epoch:1 Step:568 Training_loss:0.710691, Acc_avg:56.75% Training_loss_avg:0.682676
Epoch:1 Step:576 Training_loss:0.766428, Acc_avg:56.50% Training_loss_avg:0.683945
Epoch:1 Step:584 Training_loss:0.742927, Acc_avg:56.50% Training_loss_avg:0.684655
Epoch:1 Step:592 Training_loss:0

52it [00:07,  6.58it/s]


Epoch:1 Step:744 Val_loss:0.668435, Val_Acc_avg:59.25%
Epoch:1 Step:752 Training_loss:0.816863, Acc_avg:57.75% Training_loss_avg:0.681771
Epoch:1 Step:760 Training_loss:0.815568, Acc_avg:57.00% Training_loss_avg:0.685779
Epoch:1 Step:768 Training_loss:0.661069, Acc_avg:57.00% Training_loss_avg:0.686601
Epoch:1 Step:776 Training_loss:0.840615, Acc_avg:56.25% Training_loss_avg:0.689017
Epoch:1 Step:784 Training_loss:0.667347, Acc_avg:56.50% Training_loss_avg:0.687828
Epoch:1 Step:792 Training_loss:0.624322, Acc_avg:56.75% Training_loss_avg:0.686063
Epoch:1 Step:800 Training_loss:0.763454, Acc_avg:56.50% Training_loss_avg:0.687662
Epoch:1 Step:808 Training_loss:0.578800, Acc_avg:56.75% Training_loss_avg:0.685555
Epoch:1 Step:816 Training_loss:0.717817, Acc_avg:56.75% Training_loss_avg:0.685460
Epoch:1 Step:824 Training_loss:0.610261, Acc_avg:57.25% Training_loss_avg:0.683834
Epoch:1 Step:832 Training_loss:0.614833, Acc_avg:57.00% Training_loss_avg:0.683964
Epoch:1 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:1 Step:992 Val_loss:0.671660, Val_Acc_avg:59.25%
Epoch:1 Step:1000 Training_loss:0.744951, Acc_avg:57.50% Training_loss_avg:0.679688
Epoch:1 Step:1008 Training_loss:0.643125, Acc_avg:57.50% Training_loss_avg:0.680172
Epoch:1 Step:1016 Training_loss:0.616302, Acc_avg:57.50% Training_loss_avg:0.680099
Epoch:1 Step:1024 Training_loss:0.680218, Acc_avg:57.00% Training_loss_avg:0.682159
Epoch:1 Step:1032 Training_loss:0.741711, Acc_avg:57.25% Training_loss_avg:0.683231
Epoch:1 Step:1040 Training_loss:0.678410, Acc_avg:56.75% Training_loss_avg:0.684965
Epoch:1 Step:1048 Training_loss:0.681914, Acc_avg:56.50% Training_loss_avg:0.686019
Epoch:1 Step:1056 Training_loss:0.666457, Acc_avg:56.75% Training_loss_avg:0.685613
Epoch:1 Step:1064 Training_loss:0.772234, Acc_avg:56.25% Training_loss_avg:0.687168
Epoch:1 Step:1072 Training_loss:0.692876, Acc_avg:56.25% Training_loss_avg:0.686350
Epoch:1 Step:1080 Training_loss:0.749201, Acc_avg:55.25% Training_loss_avg:0.689375
Epoch:1 Step:1088 Tra

52it [00:07,  6.61it/s]


Epoch:1 Step:1240 Val_loss:0.680718, Val_Acc_avg:54.62%
Epoch:1 Step:1248 Training_loss:0.702692, Acc_avg:57.00% Training_loss_avg:0.687111
Epoch:1 Step:1256 Training_loss:0.690495, Acc_avg:57.00% Training_loss_avg:0.687537
Epoch:1 Step:1264 Training_loss:0.681594, Acc_avg:57.25% Training_loss_avg:0.687621
Epoch:1 Step:1272 Training_loss:0.698280, Acc_avg:57.50% Training_loss_avg:0.686669
Epoch:1 Step:1280 Training_loss:0.713917, Acc_avg:57.00% Training_loss_avg:0.688012
Epoch:1 Step:1288 Training_loss:0.683957, Acc_avg:57.00% Training_loss_avg:0.688553
Epoch:1 Step:1296 Training_loss:0.637040, Acc_avg:58.00% Training_loss_avg:0.687127
Epoch:1 Step:1304 Training_loss:0.671467, Acc_avg:58.00% Training_loss_avg:0.686995
Epoch:1 Step:1312 Training_loss:0.722040, Acc_avg:57.25% Training_loss_avg:0.688983
Epoch:1 Step:1320 Training_loss:0.642436, Acc_avg:57.50% Training_loss_avg:0.689337
Epoch:1 Step:1328 Training_loss:0.745811, Acc_avg:56.75% Training_loss_avg:0.691652
Epoch:1 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:1488 Val_loss:0.669493, Val_Acc_avg:59.25%
Epoch:1 Step:1496 Training_loss:0.748872, Acc_avg:53.75% Training_loss_avg:0.696805
Epoch:1 Step:1504 Training_loss:0.739027, Acc_avg:53.25% Training_loss_avg:0.698417
Epoch:1 Step:1512 Training_loss:0.700600, Acc_avg:53.50% Training_loss_avg:0.698503
Epoch:1 Step:1520 Training_loss:0.771901, Acc_avg:53.75% Training_loss_avg:0.697949
Epoch:1 Step:1528 Training_loss:0.770857, Acc_avg:54.00% Training_loss_avg:0.699040
Epoch:1 Step:1536 Training_loss:0.635056, Acc_avg:54.25% Training_loss_avg:0.696557
Epoch:1 Step:1544 Training_loss:0.620588, Acc_avg:54.50% Training_loss_avg:0.696096
Epoch:1 Step:1552 Training_loss:0.729290, Acc_avg:53.25% Training_loss_avg:0.697939
Epoch:1 Step:1560 Training_loss:0.677207, Acc_avg:52.75% Training_loss_avg:0.698695
Epoch:1 Step:1568 Training_loss:0.732035, Acc_avg:52.25% Training_loss_avg:0.699517
Epoch:1 Step:1576 Training_loss:0.781943, Acc_avg:51.75% Training_loss_avg:0.700778
Epoch:1 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:1736 Val_loss:0.671589, Val_Acc_avg:54.62%
Epoch:1 Step:1744 Training_loss:0.665122, Acc_avg:51.00% Training_loss_avg:0.697287
Epoch:1 Step:1752 Training_loss:0.716681, Acc_avg:50.50% Training_loss_avg:0.699197
Epoch:1 Step:1760 Training_loss:0.726065, Acc_avg:50.50% Training_loss_avg:0.698602
Epoch:1 Step:1768 Training_loss:0.748337, Acc_avg:50.50% Training_loss_avg:0.697702
Epoch:1 Step:1776 Training_loss:0.619979, Acc_avg:51.25% Training_loss_avg:0.695518
Epoch:1 Step:1784 Training_loss:0.581799, Acc_avg:52.00% Training_loss_avg:0.692584
Epoch:1 Step:1792 Training_loss:0.643018, Acc_avg:52.75% Training_loss_avg:0.690508
Epoch:1 Step:1800 Training_loss:0.681819, Acc_avg:52.75% Training_loss_avg:0.690516
Epoch:1 Step:1808 Training_loss:0.699247, Acc_avg:52.25% Training_loss_avg:0.691244
Epoch:1 Step:1816 Training_loss:0.638267, Acc_avg:52.50% Training_loss_avg:0.690897
Epoch:1 Step:1824 Training_loss:0.686882, Acc_avg:52.25% Training_loss_avg:0.691572
Epoch:1 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:1984 Val_loss:0.668097, Val_Acc_avg:59.25%
Epoch:1 Step:1992 Training_loss:0.570581, Acc_avg:56.50% Training_loss_avg:0.678580
Epoch:1 Step:2000 Training_loss:0.806364, Acc_avg:56.25% Training_loss_avg:0.680611
Epoch:1 Step:2008 Training_loss:0.667264, Acc_avg:56.75% Training_loss_avg:0.679474
Epoch:1 Step:2016 Training_loss:0.717384, Acc_avg:57.00% Training_loss_avg:0.679541
Epoch:1 Step:2024 Training_loss:0.697748, Acc_avg:56.50% Training_loss_avg:0.680903
Epoch:1 Step:2032 Training_loss:0.675738, Acc_avg:56.75% Training_loss_avg:0.679839
Epoch:1 Step:2040 Training_loss:0.761827, Acc_avg:56.75% Training_loss_avg:0.679826
Epoch:1 Step:2048 Training_loss:0.565409, Acc_avg:57.25% Training_loss_avg:0.678176
Epoch:1 Step:2056 Training_loss:0.634908, Acc_avg:57.75% Training_loss_avg:0.676321
Epoch:1 Step:2064 Training_loss:0.606366, Acc_avg:57.75% Training_loss_avg:0.676289
Epoch:1 Step:2072 Training_loss:0.709603, Acc_avg:57.75% Training_loss_avg:0.676345
Epoch:1 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2232 Val_loss:0.668052, Val_Acc_avg:56.94%
Epoch:1 Step:2240 Training_loss:0.783290, Acc_avg:56.00% Training_loss_avg:0.686477
Epoch:1 Step:2248 Training_loss:0.715757, Acc_avg:55.50% Training_loss_avg:0.687500
Epoch:1 Step:2256 Training_loss:0.667907, Acc_avg:55.25% Training_loss_avg:0.688151
Epoch:1 Step:2264 Training_loss:0.715338, Acc_avg:54.75% Training_loss_avg:0.688741
Epoch:1 Step:2272 Training_loss:0.673826, Acc_avg:55.00% Training_loss_avg:0.687055
Epoch:1 Step:2280 Training_loss:0.707206, Acc_avg:54.50% Training_loss_avg:0.687907
Epoch:1 Step:2288 Training_loss:0.661220, Acc_avg:54.00% Training_loss_avg:0.689939
Epoch:1 Step:2296 Training_loss:0.700498, Acc_avg:52.75% Training_loss_avg:0.692294
Epoch:1 Step:2304 Training_loss:0.714486, Acc_avg:52.75% Training_loss_avg:0.692208
Epoch:1 Step:2312 Training_loss:0.632884, Acc_avg:53.50% Training_loss_avg:0.689994
Epoch:1 Step:2320 Training_loss:0.563699, Acc_avg:53.75% Training_loss_avg:0.688194
Epoch:1 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2480 Val_loss:0.666068, Val_Acc_avg:56.94%
Epoch:1 Step:2488 Training_loss:0.786043, Acc_avg:55.75% Training_loss_avg:0.684167
Epoch:1 Step:2496 Training_loss:0.714486, Acc_avg:55.75% Training_loss_avg:0.683990
Epoch:1 Step:2504 Training_loss:0.720207, Acc_avg:55.50% Training_loss_avg:0.684812
Epoch:1 Step:2512 Training_loss:0.682686, Acc_avg:54.75% Training_loss_avg:0.685730
Epoch:1 Step:2520 Training_loss:0.679548, Acc_avg:54.75% Training_loss_avg:0.686937
Epoch:1 Step:2528 Training_loss:0.701636, Acc_avg:54.00% Training_loss_avg:0.688259
Epoch:1 Step:2536 Training_loss:0.683608, Acc_avg:53.50% Training_loss_avg:0.689058
Epoch:1 Step:2544 Training_loss:0.668778, Acc_avg:53.50% Training_loss_avg:0.688927
Epoch:1 Step:2552 Training_loss:0.720154, Acc_avg:53.00% Training_loss_avg:0.690239
Epoch:1 Step:2560 Training_loss:0.656003, Acc_avg:53.50% Training_loss_avg:0.687476
Epoch:1 Step:2568 Training_loss:0.727767, Acc_avg:53.75% Training_loss_avg:0.687468
Epoch:1 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2728 Val_loss:0.666775, Val_Acc_avg:52.31%
Epoch:1 Step:2736 Training_loss:0.609469, Acc_avg:57.75% Training_loss_avg:0.675519
Epoch:1 Step:2744 Training_loss:0.736927, Acc_avg:57.00% Training_loss_avg:0.677771
Epoch:1 Step:2752 Training_loss:0.643072, Acc_avg:57.50% Training_loss_avg:0.677896
Epoch:1 Step:2760 Training_loss:0.584894, Acc_avg:57.75% Training_loss_avg:0.676312
Epoch:1 Step:2768 Training_loss:0.615773, Acc_avg:58.25% Training_loss_avg:0.673913
Epoch:1 Step:2776 Training_loss:0.776983, Acc_avg:57.50% Training_loss_avg:0.675640
Epoch:1 Step:2784 Training_loss:0.741281, Acc_avg:56.25% Training_loss_avg:0.678173
Epoch:1 Step:2792 Training_loss:0.665354, Acc_avg:56.25% Training_loss_avg:0.678716
Epoch:1 Step:2800 Training_loss:0.648642, Acc_avg:56.00% Training_loss_avg:0.677310
Epoch:1 Step:2808 Training_loss:0.718407, Acc_avg:56.25% Training_loss_avg:0.677255
Epoch:1 Step:2816 Training_loss:0.750537, Acc_avg:55.75% Training_loss_avg:0.678189
Epoch:1 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2976 Val_loss:0.678481, Val_Acc_avg:47.69%
Epoch:1 Step:2984 Training_loss:0.751861, Acc_avg:53.75% Training_loss_avg:0.682187
Epoch:1 Step:2992 Training_loss:0.680051, Acc_avg:53.75% Training_loss_avg:0.682638
Epoch:1 Step:3000 Training_loss:0.725946, Acc_avg:53.75% Training_loss_avg:0.682549
Epoch:1 Step:3008 Training_loss:0.696642, Acc_avg:53.50% Training_loss_avg:0.683513
Epoch:1 Step:3016 Training_loss:0.728992, Acc_avg:52.25% Training_loss_avg:0.685493
Epoch:1 Step:3024 Training_loss:0.644576, Acc_avg:53.00% Training_loss_avg:0.683950
Epoch:1 Step:3032 Training_loss:0.674201, Acc_avg:52.25% Training_loss_avg:0.686853
Epoch:1 Step:3040 Training_loss:0.693727, Acc_avg:52.00% Training_loss_avg:0.686731
Epoch:1 Step:3048 Training_loss:0.646652, Acc_avg:51.50% Training_loss_avg:0.687469
Epoch:1 Step:3056 Training_loss:0.635489, Acc_avg:51.50% Training_loss_avg:0.687716
Epoch:1 Step:3064 Training_loss:0.741939, Acc_avg:50.75% Training_loss_avg:0.689975
Epoch:1 Step:3072 Tr

52it [00:07,  6.61it/s]


Epoch:1 Step:3224 Val_loss:0.659850, Val_Acc_avg:56.94%
Epoch:1 Step:3232 Training_loss:0.702269, Acc_avg:54.00% Training_loss_avg:0.680888
Epoch:1 Step:3240 Training_loss:0.515228, Acc_avg:54.75% Training_loss_avg:0.677551
Epoch:1 Step:3248 Training_loss:0.681992, Acc_avg:55.25% Training_loss_avg:0.676772
Epoch:1 Step:3256 Training_loss:0.664608, Acc_avg:55.25% Training_loss_avg:0.677247
Epoch:1 Step:3264 Training_loss:0.648045, Acc_avg:55.00% Training_loss_avg:0.677322
Epoch:1 Step:3272 Training_loss:0.819985, Acc_avg:54.75% Training_loss_avg:0.679126
Epoch:1 Step:3280 Training_loss:0.589862, Acc_avg:55.25% Training_loss_avg:0.676106
Epoch:1 Step:3288 Training_loss:0.689981, Acc_avg:56.00% Training_loss_avg:0.674999
Epoch:1 Step:3296 Training_loss:0.652897, Acc_avg:56.75% Training_loss_avg:0.674161
Epoch:1 Step:3304 Training_loss:0.683605, Acc_avg:56.75% Training_loss_avg:0.675147
Epoch:1 Step:3312 Training_loss:0.705546, Acc_avg:56.25% Training_loss_avg:0.676991
Epoch:1 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:0 Val_loss:0.658143, Val_Acc_avg:56.94%
Epoch:2 Step:8 Training_loss:0.767093, Acc_avg:57.33% Training_loss_avg:0.674885
Epoch:2 Step:16 Training_loss:0.549519, Acc_avg:58.08% Training_loss_avg:0.671088
Epoch:2 Step:24 Training_loss:0.659215, Acc_avg:58.33% Training_loss_avg:0.669734
Epoch:2 Step:32 Training_loss:0.780039, Acc_avg:58.08% Training_loss_avg:0.671536
Epoch:2 Step:40 Training_loss:0.657610, Acc_avg:58.58% Training_loss_avg:0.671067
Epoch:2 Step:48 Training_loss:0.716805, Acc_avg:58.83% Training_loss_avg:0.670366
Epoch:2 Step:56 Training_loss:0.672330, Acc_avg:59.08% Training_loss_avg:0.670212
Epoch:2 Step:64 Training_loss:0.536085, Acc_avg:60.08% Training_loss_avg:0.666414
Epoch:2 Step:72 Training_loss:0.624981, Acc_avg:60.33% Training_loss_avg:0.664981
Epoch:2 Step:80 Training_loss:0.658976, Acc_avg:61.33% Training_loss_avg:0.663581
Epoch:2 Step:88 Training_loss:0.795075, Acc_avg:60.83% Training_loss_avg:0.666591
Epoch:2 Step:96 Training_loss:0.679381, Acc_av

52it [00:07,  6.60it/s]


Epoch:2 Step:248 Val_loss:0.657757, Val_Acc_avg:56.94%
Epoch:2 Step:256 Training_loss:0.696087, Acc_avg:58.83% Training_loss_avg:0.680165
Epoch:2 Step:264 Training_loss:0.676044, Acc_avg:59.33% Training_loss_avg:0.678192
Epoch:2 Step:272 Training_loss:0.618165, Acc_avg:59.33% Training_loss_avg:0.677669
Epoch:2 Step:280 Training_loss:0.651034, Acc_avg:59.83% Training_loss_avg:0.676185
Epoch:2 Step:288 Training_loss:0.684526, Acc_avg:59.83% Training_loss_avg:0.676218
Epoch:2 Step:296 Training_loss:0.612447, Acc_avg:60.08% Training_loss_avg:0.674422
Epoch:2 Step:304 Training_loss:0.686516, Acc_avg:59.58% Training_loss_avg:0.677847
Epoch:2 Step:312 Training_loss:0.609894, Acc_avg:59.83% Training_loss_avg:0.676405
Epoch:2 Step:320 Training_loss:0.707152, Acc_avg:59.83% Training_loss_avg:0.677256
Epoch:2 Step:328 Training_loss:0.573140, Acc_avg:60.08% Training_loss_avg:0.675758
Epoch:2 Step:336 Training_loss:0.586810, Acc_avg:60.83% Training_loss_avg:0.671095
Epoch:2 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:2 Step:496 Val_loss:0.653619, Val_Acc_avg:59.25%
Epoch:2 Step:504 Training_loss:0.629799, Acc_avg:60.50% Training_loss_avg:0.667536
Epoch:2 Step:512 Training_loss:0.650836, Acc_avg:60.25% Training_loss_avg:0.668011
Epoch:2 Step:520 Training_loss:0.683265, Acc_avg:59.50% Training_loss_avg:0.670828
Epoch:2 Step:528 Training_loss:0.551643, Acc_avg:60.75% Training_loss_avg:0.665703
Epoch:2 Step:536 Training_loss:0.747077, Acc_avg:60.50% Training_loss_avg:0.667113
Epoch:2 Step:544 Training_loss:0.766605, Acc_avg:60.25% Training_loss_avg:0.667428
Epoch:2 Step:552 Training_loss:0.657692, Acc_avg:60.25% Training_loss_avg:0.667101
Epoch:2 Step:560 Training_loss:0.669476, Acc_avg:60.25% Training_loss_avg:0.666882
Epoch:2 Step:568 Training_loss:0.624578, Acc_avg:60.75% Training_loss_avg:0.665511
Epoch:2 Step:576 Training_loss:0.598368, Acc_avg:60.75% Training_loss_avg:0.663965
Epoch:2 Step:584 Training_loss:0.560956, Acc_avg:60.75% Training_loss_avg:0.662453
Epoch:2 Step:592 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:2 Step:744 Val_loss:0.651845, Val_Acc_avg:56.94%
Epoch:2 Step:752 Training_loss:0.618801, Acc_avg:62.25% Training_loss_avg:0.661357
Epoch:2 Step:760 Training_loss:0.693550, Acc_avg:61.75% Training_loss_avg:0.662726
Epoch:2 Step:768 Training_loss:0.814486, Acc_avg:60.75% Training_loss_avg:0.666789
Epoch:2 Step:776 Training_loss:0.709080, Acc_avg:61.00% Training_loss_avg:0.667686
Epoch:2 Step:784 Training_loss:0.700234, Acc_avg:61.00% Training_loss_avg:0.667858
Epoch:2 Step:792 Training_loss:0.591563, Acc_avg:61.75% Training_loss_avg:0.665012
Epoch:2 Step:800 Training_loss:0.657075, Acc_avg:61.50% Training_loss_avg:0.664594
Epoch:2 Step:808 Training_loss:0.747419, Acc_avg:61.00% Training_loss_avg:0.666490
Epoch:2 Step:816 Training_loss:0.714368, Acc_avg:61.00% Training_loss_avg:0.667315
Epoch:2 Step:824 Training_loss:0.794626, Acc_avg:59.75% Training_loss_avg:0.671801
Epoch:2 Step:832 Training_loss:0.660744, Acc_avg:59.75% Training_loss_avg:0.671840
Epoch:2 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:2 Step:992 Val_loss:0.656234, Val_Acc_avg:54.62%
Epoch:2 Step:1000 Training_loss:0.660402, Acc_avg:56.25% Training_loss_avg:0.680095
Epoch:2 Step:1008 Training_loss:0.641071, Acc_avg:56.25% Training_loss_avg:0.679401
Epoch:2 Step:1016 Training_loss:0.671039, Acc_avg:56.00% Training_loss_avg:0.681205
Epoch:2 Step:1024 Training_loss:0.664480, Acc_avg:55.50% Training_loss_avg:0.682707
Epoch:2 Step:1032 Training_loss:0.598706, Acc_avg:55.75% Training_loss_avg:0.679244
Epoch:2 Step:1040 Training_loss:0.719405, Acc_avg:55.50% Training_loss_avg:0.679356
Epoch:2 Step:1048 Training_loss:0.722714, Acc_avg:55.00% Training_loss_avg:0.682445
Epoch:2 Step:1056 Training_loss:0.655267, Acc_avg:55.25% Training_loss_avg:0.680227
Epoch:2 Step:1064 Training_loss:0.630477, Acc_avg:55.50% Training_loss_avg:0.681260
Epoch:2 Step:1072 Training_loss:0.771899, Acc_avg:54.50% Training_loss_avg:0.685149
Epoch:2 Step:1080 Training_loss:0.625616, Acc_avg:55.25% Training_loss_avg:0.680325
Epoch:2 Step:1088 Tra

52it [00:07,  6.61it/s]


Epoch:2 Step:1240 Val_loss:0.647588, Val_Acc_avg:56.94%
Epoch:2 Step:1248 Training_loss:0.636024, Acc_avg:56.75% Training_loss_avg:0.668424
Epoch:2 Step:1256 Training_loss:0.582124, Acc_avg:57.00% Training_loss_avg:0.667804
Epoch:2 Step:1264 Training_loss:0.660536, Acc_avg:56.75% Training_loss_avg:0.667176
Epoch:2 Step:1272 Training_loss:0.638924, Acc_avg:57.50% Training_loss_avg:0.663462
Epoch:2 Step:1280 Training_loss:0.678560, Acc_avg:57.75% Training_loss_avg:0.663600
Epoch:2 Step:1288 Training_loss:0.654422, Acc_avg:57.75% Training_loss_avg:0.663523
Epoch:2 Step:1296 Training_loss:0.725678, Acc_avg:57.50% Training_loss_avg:0.665580
Epoch:2 Step:1304 Training_loss:0.651416, Acc_avg:57.75% Training_loss_avg:0.664546
Epoch:2 Step:1312 Training_loss:0.604425, Acc_avg:58.00% Training_loss_avg:0.665432
Epoch:2 Step:1320 Training_loss:0.660567, Acc_avg:58.25% Training_loss_avg:0.662188
Epoch:2 Step:1328 Training_loss:0.649422, Acc_avg:58.00% Training_loss_avg:0.662551
Epoch:2 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:1488 Val_loss:0.647915, Val_Acc_avg:56.94%
Epoch:2 Step:1496 Training_loss:0.607926, Acc_avg:61.00% Training_loss_avg:0.663150
Epoch:2 Step:1504 Training_loss:0.585070, Acc_avg:60.75% Training_loss_avg:0.663249
Epoch:2 Step:1512 Training_loss:0.651651, Acc_avg:60.75% Training_loss_avg:0.662686
Epoch:2 Step:1520 Training_loss:0.642816, Acc_avg:61.25% Training_loss_avg:0.661201
Epoch:2 Step:1528 Training_loss:0.680858, Acc_avg:61.75% Training_loss_avg:0.658889
Epoch:2 Step:1536 Training_loss:0.761495, Acc_avg:61.50% Training_loss_avg:0.659913
Epoch:2 Step:1544 Training_loss:0.738802, Acc_avg:61.00% Training_loss_avg:0.661707
Epoch:2 Step:1552 Training_loss:0.638598, Acc_avg:61.50% Training_loss_avg:0.661077
Epoch:2 Step:1560 Training_loss:0.584241, Acc_avg:62.00% Training_loss_avg:0.659733
Epoch:2 Step:1568 Training_loss:0.644775, Acc_avg:62.00% Training_loss_avg:0.660025
Epoch:2 Step:1576 Training_loss:0.647787, Acc_avg:62.25% Training_loss_avg:0.659102
Epoch:2 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:1736 Val_loss:0.649766, Val_Acc_avg:54.62%
Epoch:2 Step:1744 Training_loss:0.696327, Acc_avg:58.25% Training_loss_avg:0.668616
Epoch:2 Step:1752 Training_loss:0.515848, Acc_avg:58.50% Training_loss_avg:0.665151
Epoch:2 Step:1760 Training_loss:0.648222, Acc_avg:59.00% Training_loss_avg:0.664172
Epoch:2 Step:1768 Training_loss:0.774973, Acc_avg:58.50% Training_loss_avg:0.668032
Epoch:2 Step:1776 Training_loss:0.501493, Acc_avg:59.50% Training_loss_avg:0.664312
Epoch:2 Step:1784 Training_loss:0.739492, Acc_avg:59.00% Training_loss_avg:0.664659
Epoch:2 Step:1792 Training_loss:0.604432, Acc_avg:59.75% Training_loss_avg:0.662797
Epoch:2 Step:1800 Training_loss:0.690019, Acc_avg:59.75% Training_loss_avg:0.663386
Epoch:2 Step:1808 Training_loss:0.591497, Acc_avg:60.25% Training_loss_avg:0.660530
Epoch:2 Step:1816 Training_loss:0.681362, Acc_avg:60.50% Training_loss_avg:0.659891
Epoch:2 Step:1824 Training_loss:0.602065, Acc_avg:61.00% Training_loss_avg:0.657346
Epoch:2 Step:1832 Tr

52it [00:07,  6.61it/s]


Epoch:2 Step:1984 Val_loss:0.647739, Val_Acc_avg:52.31%
Epoch:2 Step:1992 Training_loss:0.638079, Acc_avg:61.25% Training_loss_avg:0.660746
Epoch:2 Step:2000 Training_loss:0.704495, Acc_avg:61.50% Training_loss_avg:0.660199
Epoch:2 Step:2008 Training_loss:0.643216, Acc_avg:61.25% Training_loss_avg:0.660834
Epoch:2 Step:2016 Training_loss:0.622959, Acc_avg:61.75% Training_loss_avg:0.659320
Epoch:2 Step:2024 Training_loss:0.671158, Acc_avg:61.75% Training_loss_avg:0.659346
Epoch:2 Step:2032 Training_loss:0.624386, Acc_avg:62.00% Training_loss_avg:0.657501
Epoch:2 Step:2040 Training_loss:0.726889, Acc_avg:61.50% Training_loss_avg:0.659370
Epoch:2 Step:2048 Training_loss:0.704967, Acc_avg:61.50% Training_loss_avg:0.660484
Epoch:2 Step:2056 Training_loss:0.669065, Acc_avg:61.25% Training_loss_avg:0.661562
Epoch:2 Step:2064 Training_loss:0.593184, Acc_avg:62.00% Training_loss_avg:0.659313
Epoch:2 Step:2072 Training_loss:0.616621, Acc_avg:63.00% Training_loss_avg:0.657528
Epoch:2 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2232 Val_loss:0.641915, Val_Acc_avg:59.25%
Epoch:2 Step:2240 Training_loss:0.740899, Acc_avg:60.00% Training_loss_avg:0.670892
Epoch:2 Step:2248 Training_loss:0.666516, Acc_avg:59.25% Training_loss_avg:0.673140
Epoch:2 Step:2256 Training_loss:0.587070, Acc_avg:59.75% Training_loss_avg:0.670702
Epoch:2 Step:2264 Training_loss:0.730924, Acc_avg:59.50% Training_loss_avg:0.672736
Epoch:2 Step:2272 Training_loss:0.606378, Acc_avg:59.50% Training_loss_avg:0.672064
Epoch:2 Step:2280 Training_loss:0.595870, Acc_avg:60.75% Training_loss_avg:0.667428
Epoch:2 Step:2288 Training_loss:0.673575, Acc_avg:60.50% Training_loss_avg:0.667866
Epoch:2 Step:2296 Training_loss:0.719001, Acc_avg:60.25% Training_loss_avg:0.669282
Epoch:2 Step:2304 Training_loss:0.536987, Acc_avg:61.50% Training_loss_avg:0.663051
Epoch:2 Step:2312 Training_loss:0.677110, Acc_avg:61.00% Training_loss_avg:0.665226
Epoch:2 Step:2320 Training_loss:0.662980, Acc_avg:61.00% Training_loss_avg:0.664390
Epoch:2 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2480 Val_loss:0.642205, Val_Acc_avg:59.25%
Epoch:2 Step:2488 Training_loss:0.684191, Acc_avg:59.25% Training_loss_avg:0.664993
Epoch:2 Step:2496 Training_loss:0.658514, Acc_avg:59.25% Training_loss_avg:0.664967
Epoch:2 Step:2504 Training_loss:0.657977, Acc_avg:59.25% Training_loss_avg:0.666031
Epoch:2 Step:2512 Training_loss:0.652848, Acc_avg:59.00% Training_loss_avg:0.664359
Epoch:2 Step:2520 Training_loss:0.608456, Acc_avg:59.75% Training_loss_avg:0.661952
Epoch:2 Step:2528 Training_loss:0.695815, Acc_avg:60.25% Training_loss_avg:0.661552
Epoch:2 Step:2536 Training_loss:0.717786, Acc_avg:59.75% Training_loss_avg:0.663649
Epoch:2 Step:2544 Training_loss:0.580464, Acc_avg:59.75% Training_loss_avg:0.662432
Epoch:2 Step:2552 Training_loss:0.707539, Acc_avg:59.75% Training_loss_avg:0.662655
Epoch:2 Step:2560 Training_loss:0.667032, Acc_avg:59.75% Training_loss_avg:0.662210
Epoch:2 Step:2568 Training_loss:0.577982, Acc_avg:59.50% Training_loss_avg:0.661714
Epoch:2 Step:2576 Tr

52it [00:07,  6.61it/s]


Epoch:2 Step:2728 Val_loss:0.643095, Val_Acc_avg:54.62%
Epoch:2 Step:2736 Training_loss:0.578472, Acc_avg:58.25% Training_loss_avg:0.656069
Epoch:2 Step:2744 Training_loss:0.773914, Acc_avg:57.25% Training_loss_avg:0.660366
Epoch:2 Step:2752 Training_loss:0.665002, Acc_avg:57.75% Training_loss_avg:0.658349
Epoch:2 Step:2760 Training_loss:0.704891, Acc_avg:57.50% Training_loss_avg:0.659869
Epoch:2 Step:2768 Training_loss:0.654130, Acc_avg:57.25% Training_loss_avg:0.661009
Epoch:2 Step:2776 Training_loss:0.717974, Acc_avg:57.25% Training_loss_avg:0.663746
Epoch:2 Step:2784 Training_loss:0.742046, Acc_avg:56.50% Training_loss_avg:0.666777
Epoch:2 Step:2792 Training_loss:0.639019, Acc_avg:56.25% Training_loss_avg:0.667783
Epoch:2 Step:2800 Training_loss:0.587511, Acc_avg:56.00% Training_loss_avg:0.667754
Epoch:2 Step:2808 Training_loss:0.628016, Acc_avg:56.00% Training_loss_avg:0.666613
Epoch:2 Step:2816 Training_loss:0.569877, Acc_avg:56.50% Training_loss_avg:0.663920
Epoch:2 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2976 Val_loss:0.645341, Val_Acc_avg:54.62%
Epoch:2 Step:2984 Training_loss:0.562879, Acc_avg:55.00% Training_loss_avg:0.667354
Epoch:2 Step:2992 Training_loss:0.619741, Acc_avg:55.25% Training_loss_avg:0.665897
Epoch:2 Step:3000 Training_loss:0.739295, Acc_avg:54.75% Training_loss_avg:0.666839
Epoch:2 Step:3008 Training_loss:0.669122, Acc_avg:54.75% Training_loss_avg:0.665251
Epoch:2 Step:3016 Training_loss:0.769488, Acc_avg:54.50% Training_loss_avg:0.667428
Epoch:2 Step:3024 Training_loss:0.680907, Acc_avg:54.25% Training_loss_avg:0.669041
Epoch:2 Step:3032 Training_loss:0.649886, Acc_avg:53.50% Training_loss_avg:0.669620
Epoch:2 Step:3040 Training_loss:0.582341, Acc_avg:53.50% Training_loss_avg:0.667033
Epoch:2 Step:3048 Training_loss:0.643936, Acc_avg:54.25% Training_loss_avg:0.666423
Epoch:2 Step:3056 Training_loss:0.701357, Acc_avg:54.50% Training_loss_avg:0.665378
Epoch:2 Step:3064 Training_loss:0.725670, Acc_avg:53.50% Training_loss_avg:0.668788
Epoch:2 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:3224 Val_loss:0.642581, Val_Acc_avg:56.94%
Epoch:2 Step:3232 Training_loss:0.694720, Acc_avg:54.25% Training_loss_avg:0.666221
Epoch:2 Step:3240 Training_loss:0.579798, Acc_avg:54.50% Training_loss_avg:0.665974
Epoch:2 Step:3248 Training_loss:0.627791, Acc_avg:55.00% Training_loss_avg:0.663313
Epoch:2 Step:3256 Training_loss:0.702016, Acc_avg:54.75% Training_loss_avg:0.664723
Epoch:2 Step:3264 Training_loss:0.606247, Acc_avg:55.50% Training_loss_avg:0.663579
Epoch:2 Step:3272 Training_loss:0.766077, Acc_avg:54.75% Training_loss_avg:0.667838
Epoch:2 Step:3280 Training_loss:0.699139, Acc_avg:54.75% Training_loss_avg:0.668434
Epoch:2 Step:3288 Training_loss:0.646724, Acc_avg:55.00% Training_loss_avg:0.666812
Epoch:2 Step:3296 Training_loss:0.753264, Acc_avg:54.75% Training_loss_avg:0.669518
Epoch:2 Step:3304 Training_loss:0.702722, Acc_avg:55.00% Training_loss_avg:0.667663
Epoch:2 Step:3312 Training_loss:0.661067, Acc_avg:55.25% Training_loss_avg:0.667141
Epoch:2 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:0 Val_loss:0.640425, Val_Acc_avg:59.25%
Epoch:3 Step:8 Training_loss:0.686969, Acc_avg:57.00% Training_loss_avg:0.660232
Epoch:3 Step:16 Training_loss:0.708795, Acc_avg:57.25% Training_loss_avg:0.659095
Epoch:3 Step:24 Training_loss:0.649909, Acc_avg:57.50% Training_loss_avg:0.660864
Epoch:3 Step:32 Training_loss:0.661691, Acc_avg:58.00% Training_loss_avg:0.660058
Epoch:3 Step:40 Training_loss:0.575202, Acc_avg:58.25% Training_loss_avg:0.658568
Epoch:3 Step:48 Training_loss:0.704881, Acc_avg:57.75% Training_loss_avg:0.661408
Epoch:3 Step:56 Training_loss:0.580951, Acc_avg:58.50% Training_loss_avg:0.660633
Epoch:3 Step:64 Training_loss:0.614009, Acc_avg:59.75% Training_loss_avg:0.658127
Epoch:3 Step:72 Training_loss:0.622639, Acc_avg:60.25% Training_loss_avg:0.657197
Epoch:3 Step:80 Training_loss:0.614611, Acc_avg:60.25% Training_loss_avg:0.654100
Epoch:3 Step:88 Training_loss:0.680525, Acc_avg:60.25% Training_loss_avg:0.654092
Epoch:3 Step:96 Training_loss:0.655173, Acc_av

52it [00:07,  6.60it/s]


Epoch:3 Step:248 Val_loss:0.634845, Val_Acc_avg:56.94%
Epoch:3 Step:256 Training_loss:0.547242, Acc_avg:61.75% Training_loss_avg:0.652160
Epoch:3 Step:264 Training_loss:0.641613, Acc_avg:62.00% Training_loss_avg:0.649683
Epoch:3 Step:272 Training_loss:0.652145, Acc_avg:61.50% Training_loss_avg:0.650312
Epoch:3 Step:280 Training_loss:0.657168, Acc_avg:62.25% Training_loss_avg:0.648812
Epoch:3 Step:288 Training_loss:0.714843, Acc_avg:61.75% Training_loss_avg:0.651688
Epoch:3 Step:296 Training_loss:0.563774, Acc_avg:62.25% Training_loss_avg:0.649069
Epoch:3 Step:304 Training_loss:0.641441, Acc_avg:61.75% Training_loss_avg:0.650302
Epoch:3 Step:312 Training_loss:0.682364, Acc_avg:61.50% Training_loss_avg:0.651393
Epoch:3 Step:320 Training_loss:0.691770, Acc_avg:61.50% Training_loss_avg:0.651188
Epoch:3 Step:328 Training_loss:0.736586, Acc_avg:60.50% Training_loss_avg:0.653795
Epoch:3 Step:336 Training_loss:0.625340, Acc_avg:60.50% Training_loss_avg:0.650980
Epoch:3 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:3 Step:496 Val_loss:0.629827, Val_Acc_avg:59.25%
Epoch:3 Step:504 Training_loss:0.621418, Acc_avg:60.25% Training_loss_avg:0.648696
Epoch:3 Step:512 Training_loss:0.580070, Acc_avg:60.75% Training_loss_avg:0.647221
Epoch:3 Step:520 Training_loss:0.686644, Acc_avg:60.75% Training_loss_avg:0.649168
Epoch:3 Step:528 Training_loss:0.734943, Acc_avg:60.25% Training_loss_avg:0.651699
Epoch:3 Step:536 Training_loss:0.808533, Acc_avg:59.75% Training_loss_avg:0.656306
Epoch:3 Step:544 Training_loss:0.631177, Acc_avg:60.00% Training_loss_avg:0.654654
Epoch:3 Step:552 Training_loss:0.630891, Acc_avg:59.50% Training_loss_avg:0.657825
Epoch:3 Step:560 Training_loss:0.581317, Acc_avg:59.75% Training_loss_avg:0.658068
Epoch:3 Step:568 Training_loss:0.580436, Acc_avg:59.50% Training_loss_avg:0.656658
Epoch:3 Step:576 Training_loss:0.683758, Acc_avg:59.25% Training_loss_avg:0.656719
Epoch:3 Step:584 Training_loss:0.706134, Acc_avg:58.75% Training_loss_avg:0.657782
Epoch:3 Step:592 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:3 Step:744 Val_loss:0.642900, Val_Acc_avg:52.31%
Epoch:3 Step:752 Training_loss:0.698332, Acc_avg:58.50% Training_loss_avg:0.657103
Epoch:3 Step:760 Training_loss:0.671482, Acc_avg:58.75% Training_loss_avg:0.655004
Epoch:3 Step:768 Training_loss:0.732259, Acc_avg:58.75% Training_loss_avg:0.657290
Epoch:3 Step:776 Training_loss:0.639038, Acc_avg:58.50% Training_loss_avg:0.657840
Epoch:3 Step:784 Training_loss:0.626315, Acc_avg:58.00% Training_loss_avg:0.659964
Epoch:3 Step:792 Training_loss:0.623509, Acc_avg:58.50% Training_loss_avg:0.658028
Epoch:3 Step:800 Training_loss:0.695326, Acc_avg:59.00% Training_loss_avg:0.656873
Epoch:3 Step:808 Training_loss:0.820656, Acc_avg:58.25% Training_loss_avg:0.661639
Epoch:3 Step:816 Training_loss:0.524157, Acc_avg:58.75% Training_loss_avg:0.659568
Epoch:3 Step:824 Training_loss:0.820100, Acc_avg:58.50% Training_loss_avg:0.662184
Epoch:3 Step:832 Training_loss:0.642336, Acc_avg:58.25% Training_loss_avg:0.665000
Epoch:3 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:3 Step:992 Val_loss:0.628473, Val_Acc_avg:56.94%
Epoch:3 Step:1000 Training_loss:0.717573, Acc_avg:58.75% Training_loss_avg:0.664585
Epoch:3 Step:1008 Training_loss:0.641377, Acc_avg:59.00% Training_loss_avg:0.663846
Epoch:3 Step:1016 Training_loss:0.548099, Acc_avg:59.75% Training_loss_avg:0.660270
Epoch:3 Step:1024 Training_loss:0.631436, Acc_avg:60.75% Training_loss_avg:0.656370
Epoch:3 Step:1032 Training_loss:0.653562, Acc_avg:61.00% Training_loss_avg:0.657138
Epoch:3 Step:1040 Training_loss:0.481327, Acc_avg:61.25% Training_loss_avg:0.654574
Epoch:3 Step:1048 Training_loss:0.708482, Acc_avg:60.75% Training_loss_avg:0.656690
Epoch:3 Step:1056 Training_loss:0.651299, Acc_avg:61.00% Training_loss_avg:0.655364
Epoch:3 Step:1064 Training_loss:0.676478, Acc_avg:60.75% Training_loss_avg:0.656958
Epoch:3 Step:1072 Training_loss:0.724544, Acc_avg:60.50% Training_loss_avg:0.658057
Epoch:3 Step:1080 Training_loss:0.645002, Acc_avg:60.00% Training_loss_avg:0.658665
Epoch:3 Step:1088 Tra

52it [00:07,  6.61it/s]


Epoch:3 Step:1240 Val_loss:0.624021, Val_Acc_avg:54.62%
Epoch:3 Step:1248 Training_loss:0.519196, Acc_avg:61.75% Training_loss_avg:0.644948
Epoch:3 Step:1256 Training_loss:0.741873, Acc_avg:62.00% Training_loss_avg:0.643326
Epoch:3 Step:1264 Training_loss:0.629164, Acc_avg:61.75% Training_loss_avg:0.644299
Epoch:3 Step:1272 Training_loss:0.679866, Acc_avg:61.25% Training_loss_avg:0.645834
Epoch:3 Step:1280 Training_loss:0.665734, Acc_avg:61.25% Training_loss_avg:0.645735
Epoch:3 Step:1288 Training_loss:0.642301, Acc_avg:62.50% Training_loss_avg:0.640153
Epoch:3 Step:1296 Training_loss:0.702760, Acc_avg:61.75% Training_loss_avg:0.644633
Epoch:3 Step:1304 Training_loss:0.781710, Acc_avg:61.50% Training_loss_avg:0.646405
Epoch:3 Step:1312 Training_loss:0.669344, Acc_avg:60.50% Training_loss_avg:0.648791
Epoch:3 Step:1320 Training_loss:0.599810, Acc_avg:60.25% Training_loss_avg:0.650406
Epoch:3 Step:1328 Training_loss:0.567312, Acc_avg:61.00% Training_loss_avg:0.645813
Epoch:3 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:1488 Val_loss:0.627169, Val_Acc_avg:59.25%
Epoch:3 Step:1496 Training_loss:0.781863, Acc_avg:61.00% Training_loss_avg:0.650562
Epoch:3 Step:1504 Training_loss:0.562683, Acc_avg:61.50% Training_loss_avg:0.649267
Epoch:3 Step:1512 Training_loss:0.673696, Acc_avg:62.00% Training_loss_avg:0.647635
Epoch:3 Step:1520 Training_loss:0.595887, Acc_avg:61.75% Training_loss_avg:0.647951
Epoch:3 Step:1528 Training_loss:0.658149, Acc_avg:61.50% Training_loss_avg:0.647863
Epoch:3 Step:1536 Training_loss:0.551224, Acc_avg:61.50% Training_loss_avg:0.643978
Epoch:3 Step:1544 Training_loss:0.643314, Acc_avg:61.25% Training_loss_avg:0.643824
Epoch:3 Step:1552 Training_loss:0.445071, Acc_avg:61.25% Training_loss_avg:0.643534
Epoch:3 Step:1560 Training_loss:0.562789, Acc_avg:61.50% Training_loss_avg:0.640627
Epoch:3 Step:1568 Training_loss:0.721469, Acc_avg:60.50% Training_loss_avg:0.644137
Epoch:3 Step:1576 Training_loss:0.687089, Acc_avg:61.25% Training_loss_avg:0.641678
Epoch:3 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:1736 Val_loss:0.632761, Val_Acc_avg:56.94%
Epoch:3 Step:1744 Training_loss:0.650237, Acc_avg:63.25% Training_loss_avg:0.636413
Epoch:3 Step:1752 Training_loss:0.541235, Acc_avg:63.50% Training_loss_avg:0.633384
Epoch:3 Step:1760 Training_loss:0.650001, Acc_avg:63.25% Training_loss_avg:0.633919
Epoch:3 Step:1768 Training_loss:0.600557, Acc_avg:62.75% Training_loss_avg:0.635481
Epoch:3 Step:1776 Training_loss:0.870203, Acc_avg:62.50% Training_loss_avg:0.638353
Epoch:3 Step:1784 Training_loss:0.679926, Acc_avg:62.25% Training_loss_avg:0.637697
Epoch:3 Step:1792 Training_loss:0.590901, Acc_avg:61.75% Training_loss_avg:0.637882
Epoch:3 Step:1800 Training_loss:0.614016, Acc_avg:62.25% Training_loss_avg:0.636576
Epoch:3 Step:1808 Training_loss:0.617713, Acc_avg:62.75% Training_loss_avg:0.631666
Epoch:3 Step:1816 Training_loss:0.705289, Acc_avg:62.00% Training_loss_avg:0.634075
Epoch:3 Step:1824 Training_loss:0.616651, Acc_avg:62.75% Training_loss_avg:0.633949
Epoch:3 Step:1832 Tr

52it [00:07,  6.61it/s]


Epoch:3 Step:1984 Val_loss:0.626424, Val_Acc_avg:56.94%
Epoch:3 Step:1992 Training_loss:0.723075, Acc_avg:61.75% Training_loss_avg:0.641889
Epoch:3 Step:2000 Training_loss:0.490633, Acc_avg:62.00% Training_loss_avg:0.640318
Epoch:3 Step:2008 Training_loss:0.557686, Acc_avg:61.75% Training_loss_avg:0.641846
Epoch:3 Step:2016 Training_loss:0.426130, Acc_avg:62.25% Training_loss_avg:0.636705
Epoch:3 Step:2024 Training_loss:0.453026, Acc_avg:62.50% Training_loss_avg:0.630228
Epoch:3 Step:2032 Training_loss:0.610799, Acc_avg:62.25% Training_loss_avg:0.631666
Epoch:3 Step:2040 Training_loss:0.626678, Acc_avg:62.50% Training_loss_avg:0.626687
Epoch:3 Step:2048 Training_loss:0.571133, Acc_avg:62.25% Training_loss_avg:0.631234
Epoch:3 Step:2056 Training_loss:0.549217, Acc_avg:62.75% Training_loss_avg:0.630152
Epoch:3 Step:2064 Training_loss:0.647904, Acc_avg:62.75% Training_loss_avg:0.630299
Epoch:3 Step:2072 Training_loss:0.658542, Acc_avg:62.75% Training_loss_avg:0.629179
Epoch:3 Step:2080 Tr

52it [00:07,  6.61it/s]


Epoch:3 Step:2232 Val_loss:0.627297, Val_Acc_avg:56.94%
Epoch:3 Step:2240 Training_loss:0.618817, Acc_avg:61.25% Training_loss_avg:0.635999
Epoch:3 Step:2248 Training_loss:0.633472, Acc_avg:61.25% Training_loss_avg:0.636355
Epoch:3 Step:2256 Training_loss:0.794508, Acc_avg:60.25% Training_loss_avg:0.640727
Epoch:3 Step:2264 Training_loss:0.615684, Acc_avg:61.00% Training_loss_avg:0.637379
Epoch:3 Step:2272 Training_loss:0.616279, Acc_avg:61.50% Training_loss_avg:0.635650
Epoch:3 Step:2280 Training_loss:0.606472, Acc_avg:61.50% Training_loss_avg:0.638187
Epoch:3 Step:2288 Training_loss:0.647400, Acc_avg:61.75% Training_loss_avg:0.636874
Epoch:3 Step:2296 Training_loss:0.534161, Acc_avg:62.00% Training_loss_avg:0.634657
Epoch:3 Step:2304 Training_loss:0.780064, Acc_avg:62.00% Training_loss_avg:0.636865
Epoch:3 Step:2312 Training_loss:0.673693, Acc_avg:62.00% Training_loss_avg:0.637314
Epoch:3 Step:2320 Training_loss:0.610009, Acc_avg:61.75% Training_loss_avg:0.637404
Epoch:3 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2480 Val_loss:0.628276, Val_Acc_avg:54.62%
Epoch:3 Step:2488 Training_loss:0.598004, Acc_avg:61.00% Training_loss_avg:0.643988
Epoch:3 Step:2496 Training_loss:0.649860, Acc_avg:62.00% Training_loss_avg:0.641292
Epoch:3 Step:2504 Training_loss:0.766196, Acc_avg:61.50% Training_loss_avg:0.644430
Epoch:3 Step:2512 Training_loss:0.705452, Acc_avg:61.75% Training_loss_avg:0.644866
Epoch:3 Step:2520 Training_loss:0.623509, Acc_avg:62.75% Training_loss_avg:0.642810
Epoch:3 Step:2528 Training_loss:0.659085, Acc_avg:62.75% Training_loss_avg:0.641996
Epoch:3 Step:2536 Training_loss:0.659636, Acc_avg:62.50% Training_loss_avg:0.644953
Epoch:3 Step:2544 Training_loss:0.486185, Acc_avg:62.75% Training_loss_avg:0.640984
Epoch:3 Step:2552 Training_loss:0.586233, Acc_avg:62.00% Training_loss_avg:0.642464
Epoch:3 Step:2560 Training_loss:0.787315, Acc_avg:62.25% Training_loss_avg:0.643679
Epoch:3 Step:2568 Training_loss:0.703718, Acc_avg:61.50% Training_loss_avg:0.648221
Epoch:3 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2728 Val_loss:0.633125, Val_Acc_avg:54.62%
Epoch:3 Step:2736 Training_loss:0.669205, Acc_avg:60.25% Training_loss_avg:0.651881
Epoch:3 Step:2744 Training_loss:0.625318, Acc_avg:61.00% Training_loss_avg:0.649952
Epoch:3 Step:2752 Training_loss:0.619228, Acc_avg:61.25% Training_loss_avg:0.651708
Epoch:3 Step:2760 Training_loss:0.770529, Acc_avg:61.25% Training_loss_avg:0.652367
Epoch:3 Step:2768 Training_loss:0.694605, Acc_avg:61.00% Training_loss_avg:0.653005
Epoch:3 Step:2776 Training_loss:0.515949, Acc_avg:61.00% Training_loss_avg:0.651094
Epoch:3 Step:2784 Training_loss:0.589562, Acc_avg:61.50% Training_loss_avg:0.648926
Epoch:3 Step:2792 Training_loss:0.644998, Acc_avg:61.25% Training_loss_avg:0.650479
Epoch:3 Step:2800 Training_loss:0.803412, Acc_avg:60.25% Training_loss_avg:0.654764
Epoch:3 Step:2808 Training_loss:0.660920, Acc_avg:60.25% Training_loss_avg:0.654173
Epoch:3 Step:2816 Training_loss:0.642400, Acc_avg:60.25% Training_loss_avg:0.656230
Epoch:3 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2976 Val_loss:0.649348, Val_Acc_avg:50.00%
Epoch:3 Step:2984 Training_loss:0.684382, Acc_avg:60.50% Training_loss_avg:0.647470
Epoch:3 Step:2992 Training_loss:0.670013, Acc_avg:60.50% Training_loss_avg:0.646814
Epoch:3 Step:3000 Training_loss:0.559202, Acc_avg:60.50% Training_loss_avg:0.643755
Epoch:3 Step:3008 Training_loss:0.677357, Acc_avg:60.50% Training_loss_avg:0.644879
Epoch:3 Step:3016 Training_loss:0.534217, Acc_avg:61.50% Training_loss_avg:0.642385
Epoch:3 Step:3024 Training_loss:0.651867, Acc_avg:61.50% Training_loss_avg:0.642133
Epoch:3 Step:3032 Training_loss:0.620360, Acc_avg:61.50% Training_loss_avg:0.642140
Epoch:3 Step:3040 Training_loss:0.607032, Acc_avg:62.25% Training_loss_avg:0.639335
Epoch:3 Step:3048 Training_loss:0.492564, Acc_avg:62.25% Training_loss_avg:0.637122
Epoch:3 Step:3056 Training_loss:0.724895, Acc_avg:61.75% Training_loss_avg:0.639522
Epoch:3 Step:3064 Training_loss:0.699047, Acc_avg:61.75% Training_loss_avg:0.638894
Epoch:3 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:3224 Val_loss:0.629093, Val_Acc_avg:59.25%
Epoch:3 Step:3232 Training_loss:0.659565, Acc_avg:62.25% Training_loss_avg:0.630484
Epoch:3 Step:3240 Training_loss:0.587900, Acc_avg:63.00% Training_loss_avg:0.626262
Epoch:3 Step:3248 Training_loss:0.862255, Acc_avg:61.75% Training_loss_avg:0.632554
Epoch:3 Step:3256 Training_loss:0.609828, Acc_avg:61.50% Training_loss_avg:0.632069
Epoch:3 Step:3264 Training_loss:0.571670, Acc_avg:61.50% Training_loss_avg:0.631876
Epoch:3 Step:3272 Training_loss:0.545824, Acc_avg:62.00% Training_loss_avg:0.630449
Epoch:3 Step:3280 Training_loss:0.667184, Acc_avg:62.50% Training_loss_avg:0.628628
Epoch:3 Step:3288 Training_loss:0.648948, Acc_avg:62.75% Training_loss_avg:0.628509
Epoch:3 Step:3296 Training_loss:0.577635, Acc_avg:62.75% Training_loss_avg:0.627725
Epoch:3 Step:3304 Training_loss:0.641431, Acc_avg:62.00% Training_loss_avg:0.629100
Epoch:3 Step:3312 Training_loss:0.528721, Acc_avg:62.50% Training_loss_avg:0.627884
Epoch:3 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:0 Val_loss:0.625220, Val_Acc_avg:54.62%
Epoch:4 Step:8 Training_loss:0.744044, Acc_avg:62.58% Training_loss_avg:0.629204
Epoch:4 Step:16 Training_loss:0.614079, Acc_avg:63.08% Training_loss_avg:0.628074
Epoch:4 Step:24 Training_loss:0.558185, Acc_avg:63.83% Training_loss_avg:0.626049
Epoch:4 Step:32 Training_loss:0.594127, Acc_avg:63.83% Training_loss_avg:0.628258
Epoch:4 Step:40 Training_loss:0.666764, Acc_avg:63.83% Training_loss_avg:0.631135
Epoch:4 Step:48 Training_loss:0.732028, Acc_avg:63.58% Training_loss_avg:0.632087
Epoch:4 Step:56 Training_loss:0.866681, Acc_avg:62.83% Training_loss_avg:0.636021
Epoch:4 Step:64 Training_loss:0.675988, Acc_avg:62.83% Training_loss_avg:0.638357
Epoch:4 Step:72 Training_loss:0.726417, Acc_avg:62.83% Training_loss_avg:0.639338
Epoch:4 Step:80 Training_loss:0.651027, Acc_avg:62.58% Training_loss_avg:0.641674
Epoch:4 Step:88 Training_loss:0.572189, Acc_avg:62.58% Training_loss_avg:0.640080
Epoch:4 Step:96 Training_loss:0.616886, Acc_av

52it [00:07,  6.60it/s]


Epoch:4 Step:248 Val_loss:0.629943, Val_Acc_avg:56.94%
Epoch:4 Step:256 Training_loss:0.635005, Acc_avg:64.58% Training_loss_avg:0.623995
Epoch:4 Step:264 Training_loss:0.714012, Acc_avg:64.58% Training_loss_avg:0.623508
Epoch:4 Step:272 Training_loss:0.655095, Acc_avg:64.33% Training_loss_avg:0.621559
Epoch:4 Step:280 Training_loss:0.667710, Acc_avg:63.58% Training_loss_avg:0.624694
Epoch:4 Step:288 Training_loss:0.630584, Acc_avg:62.83% Training_loss_avg:0.627923
Epoch:4 Step:296 Training_loss:0.485048, Acc_avg:63.33% Training_loss_avg:0.624433
Epoch:4 Step:304 Training_loss:0.654347, Acc_avg:62.83% Training_loss_avg:0.625762
Epoch:4 Step:312 Training_loss:0.570450, Acc_avg:63.58% Training_loss_avg:0.619926
Epoch:4 Step:320 Training_loss:0.454538, Acc_avg:64.08% Training_loss_avg:0.616820
Epoch:4 Step:328 Training_loss:0.550223, Acc_avg:64.08% Training_loss_avg:0.616391
Epoch:4 Step:336 Training_loss:0.689337, Acc_avg:63.58% Training_loss_avg:0.619262
Epoch:4 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:4 Step:496 Val_loss:0.644471, Val_Acc_avg:52.31%
Epoch:4 Step:504 Training_loss:0.704720, Acc_avg:63.50% Training_loss_avg:0.616501
Epoch:4 Step:512 Training_loss:0.594376, Acc_avg:64.25% Training_loss_avg:0.615448
Epoch:4 Step:520 Training_loss:0.639797, Acc_avg:64.25% Training_loss_avg:0.615997
Epoch:4 Step:528 Training_loss:0.486482, Acc_avg:64.00% Training_loss_avg:0.616495
Epoch:4 Step:536 Training_loss:0.640480, Acc_avg:63.75% Training_loss_avg:0.615915
Epoch:4 Step:544 Training_loss:0.647321, Acc_avg:64.25% Training_loss_avg:0.614183
Epoch:4 Step:552 Training_loss:0.570093, Acc_avg:65.25% Training_loss_avg:0.610880
Epoch:4 Step:560 Training_loss:0.633480, Acc_avg:64.75% Training_loss_avg:0.611792
Epoch:4 Step:568 Training_loss:0.582731, Acc_avg:64.75% Training_loss_avg:0.611298
Epoch:4 Step:576 Training_loss:0.426428, Acc_avg:64.75% Training_loss_avg:0.609746
Epoch:4 Step:584 Training_loss:0.665245, Acc_avg:65.25% Training_loss_avg:0.610227
Epoch:4 Step:592 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:4 Step:744 Val_loss:0.630155, Val_Acc_avg:56.94%
Epoch:4 Step:752 Training_loss:0.481786, Acc_avg:65.00% Training_loss_avg:0.624627
Epoch:4 Step:760 Training_loss:0.775249, Acc_avg:65.00% Training_loss_avg:0.626886
Epoch:4 Step:768 Training_loss:0.541029, Acc_avg:65.25% Training_loss_avg:0.626905
Epoch:4 Step:776 Training_loss:0.724441, Acc_avg:65.25% Training_loss_avg:0.626418
Epoch:4 Step:784 Training_loss:0.776027, Acc_avg:65.00% Training_loss_avg:0.631022
Epoch:4 Step:792 Training_loss:0.781317, Acc_avg:65.25% Training_loss_avg:0.632300
Epoch:4 Step:800 Training_loss:0.642338, Acc_avg:65.25% Training_loss_avg:0.634951
Epoch:4 Step:808 Training_loss:0.696074, Acc_avg:65.75% Training_loss_avg:0.631291
Epoch:4 Step:816 Training_loss:0.417793, Acc_avg:66.50% Training_loss_avg:0.625506
Epoch:4 Step:824 Training_loss:0.540797, Acc_avg:67.00% Training_loss_avg:0.624519
Epoch:4 Step:832 Training_loss:0.640135, Acc_avg:66.75% Training_loss_avg:0.625794
Epoch:4 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:4 Step:992 Val_loss:0.640079, Val_Acc_avg:54.62%
Epoch:4 Step:1000 Training_loss:0.687087, Acc_avg:63.25% Training_loss_avg:0.632393
Epoch:4 Step:1008 Training_loss:0.632688, Acc_avg:63.25% Training_loss_avg:0.630394
Epoch:4 Step:1016 Training_loss:0.497075, Acc_avg:63.75% Training_loss_avg:0.627333
Epoch:4 Step:1024 Training_loss:0.687469, Acc_avg:63.50% Training_loss_avg:0.630217
Epoch:4 Step:1032 Training_loss:0.647467, Acc_avg:63.50% Training_loss_avg:0.629941
Epoch:4 Step:1040 Training_loss:0.756974, Acc_avg:63.50% Training_loss_avg:0.631895
Epoch:4 Step:1048 Training_loss:0.854717, Acc_avg:62.50% Training_loss_avg:0.637471
Epoch:4 Step:1056 Training_loss:0.774428, Acc_avg:62.50% Training_loss_avg:0.639023
Epoch:4 Step:1064 Training_loss:0.810015, Acc_avg:61.75% Training_loss_avg:0.645281
Epoch:4 Step:1072 Training_loss:0.673306, Acc_avg:61.25% Training_loss_avg:0.648957
Epoch:4 Step:1080 Training_loss:0.559796, Acc_avg:60.75% Training_loss_avg:0.650672
Epoch:4 Step:1088 Tra

52it [00:07,  6.61it/s]


Epoch:4 Step:1240 Val_loss:0.625533, Val_Acc_avg:54.62%
Epoch:4 Step:1248 Training_loss:0.586031, Acc_avg:58.00% Training_loss_avg:0.672269
Epoch:4 Step:1256 Training_loss:0.740721, Acc_avg:58.25% Training_loss_avg:0.670322
Epoch:4 Step:1264 Training_loss:0.499516, Acc_avg:58.75% Training_loss_avg:0.666496
Epoch:4 Step:1272 Training_loss:0.768067, Acc_avg:58.50% Training_loss_avg:0.664305
Epoch:4 Step:1280 Training_loss:0.623156, Acc_avg:59.00% Training_loss_avg:0.662568
Epoch:4 Step:1288 Training_loss:0.717023, Acc_avg:58.00% Training_loss_avg:0.665433
Epoch:4 Step:1296 Training_loss:0.473214, Acc_avg:58.50% Training_loss_avg:0.662442
Epoch:4 Step:1304 Training_loss:0.652642, Acc_avg:58.75% Training_loss_avg:0.662176
Epoch:4 Step:1312 Training_loss:0.647560, Acc_avg:58.75% Training_loss_avg:0.662903
Epoch:4 Step:1320 Training_loss:0.652988, Acc_avg:58.75% Training_loss_avg:0.660674
Epoch:4 Step:1328 Training_loss:0.617239, Acc_avg:58.75% Training_loss_avg:0.661088
Epoch:4 Step:1336 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:1488 Val_loss:0.620804, Val_Acc_avg:59.25%
Epoch:4 Step:1496 Training_loss:0.642233, Acc_avg:62.50% Training_loss_avg:0.639376
Epoch:4 Step:1504 Training_loss:0.684117, Acc_avg:62.00% Training_loss_avg:0.640723
Epoch:4 Step:1512 Training_loss:0.544397, Acc_avg:62.75% Training_loss_avg:0.635129
Epoch:4 Step:1520 Training_loss:0.587830, Acc_avg:62.75% Training_loss_avg:0.633495
Epoch:4 Step:1528 Training_loss:0.467794, Acc_avg:64.00% Training_loss_avg:0.627497
Epoch:4 Step:1536 Training_loss:0.413655, Acc_avg:64.50% Training_loss_avg:0.623965
Epoch:4 Step:1544 Training_loss:0.809768, Acc_avg:64.75% Training_loss_avg:0.622045
Epoch:4 Step:1552 Training_loss:0.863125, Acc_avg:63.75% Training_loss_avg:0.628136
Epoch:4 Step:1560 Training_loss:0.560297, Acc_avg:63.50% Training_loss_avg:0.629303
Epoch:4 Step:1568 Training_loss:0.645586, Acc_avg:63.75% Training_loss_avg:0.627800
Epoch:4 Step:1576 Training_loss:0.646050, Acc_avg:63.50% Training_loss_avg:0.627656
Epoch:4 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:1736 Val_loss:0.619428, Val_Acc_avg:59.25%
Epoch:4 Step:1744 Training_loss:0.572389, Acc_avg:66.25% Training_loss_avg:0.612424
Epoch:4 Step:1752 Training_loss:0.778973, Acc_avg:65.25% Training_loss_avg:0.616768
Epoch:4 Step:1760 Training_loss:0.578404, Acc_avg:65.25% Training_loss_avg:0.617064
Epoch:4 Step:1768 Training_loss:0.796921, Acc_avg:64.25% Training_loss_avg:0.624136
Epoch:4 Step:1776 Training_loss:0.627334, Acc_avg:63.75% Training_loss_avg:0.626884
Epoch:4 Step:1784 Training_loss:0.748116, Acc_avg:63.25% Training_loss_avg:0.630237
Epoch:4 Step:1792 Training_loss:0.499548, Acc_avg:63.25% Training_loss_avg:0.632079
Epoch:4 Step:1800 Training_loss:0.662942, Acc_avg:62.75% Training_loss_avg:0.634218
Epoch:4 Step:1808 Training_loss:0.628677, Acc_avg:63.00% Training_loss_avg:0.632030
Epoch:4 Step:1816 Training_loss:0.764484, Acc_avg:62.75% Training_loss_avg:0.633998
Epoch:4 Step:1824 Training_loss:0.535548, Acc_avg:63.50% Training_loss_avg:0.630778
Epoch:4 Step:1832 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:1984 Val_loss:0.634263, Val_Acc_avg:54.62%
Epoch:4 Step:1992 Training_loss:0.554777, Acc_avg:61.50% Training_loss_avg:0.630735
Epoch:4 Step:2000 Training_loss:0.688101, Acc_avg:61.25% Training_loss_avg:0.633129
Epoch:4 Step:2008 Training_loss:0.597983, Acc_avg:61.25% Training_loss_avg:0.634863
Epoch:4 Step:2016 Training_loss:0.582290, Acc_avg:62.00% Training_loss_avg:0.631740
Epoch:4 Step:2024 Training_loss:0.611419, Acc_avg:62.00% Training_loss_avg:0.632139
Epoch:4 Step:2032 Training_loss:0.591976, Acc_avg:61.25% Training_loss_avg:0.634725
Epoch:4 Step:2040 Training_loss:0.650487, Acc_avg:61.50% Training_loss_avg:0.631215
Epoch:4 Step:2048 Training_loss:0.666684, Acc_avg:60.50% Training_loss_avg:0.634478
Epoch:4 Step:2056 Training_loss:0.650306, Acc_avg:61.00% Training_loss_avg:0.636122
Epoch:4 Step:2064 Training_loss:0.448618, Acc_avg:61.50% Training_loss_avg:0.631907
Epoch:4 Step:2072 Training_loss:0.630059, Acc_avg:61.50% Training_loss_avg:0.630609
Epoch:4 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:2232 Val_loss:0.630342, Val_Acc_avg:54.62%
Epoch:4 Step:2240 Training_loss:0.582721, Acc_avg:60.75% Training_loss_avg:0.632208
Epoch:4 Step:2248 Training_loss:0.660949, Acc_avg:60.50% Training_loss_avg:0.635509
Epoch:4 Step:2256 Training_loss:0.624139, Acc_avg:61.50% Training_loss_avg:0.634908
Epoch:4 Step:2264 Training_loss:0.664752, Acc_avg:60.75% Training_loss_avg:0.638382
Epoch:4 Step:2272 Training_loss:0.678478, Acc_avg:60.25% Training_loss_avg:0.641485
Epoch:4 Step:2280 Training_loss:0.637666, Acc_avg:61.00% Training_loss_avg:0.639474
Epoch:4 Step:2288 Training_loss:0.629964, Acc_avg:61.00% Training_loss_avg:0.640556
Epoch:4 Step:2296 Training_loss:0.564292, Acc_avg:60.75% Training_loss_avg:0.638761
Epoch:4 Step:2304 Training_loss:0.709254, Acc_avg:60.50% Training_loss_avg:0.639377
Epoch:4 Step:2312 Training_loss:0.427106, Acc_avg:61.25% Training_loss_avg:0.635023
Epoch:4 Step:2320 Training_loss:0.671453, Acc_avg:61.25% Training_loss_avg:0.635351
Epoch:4 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:2480 Val_loss:0.622901, Val_Acc_avg:52.31%
Epoch:4 Step:2488 Training_loss:0.754267, Acc_avg:62.00% Training_loss_avg:0.637375
Epoch:4 Step:2496 Training_loss:0.511870, Acc_avg:63.00% Training_loss_avg:0.635743
Epoch:4 Step:2504 Training_loss:0.527928, Acc_avg:62.75% Training_loss_avg:0.634932
Epoch:4 Step:2512 Training_loss:0.618084, Acc_avg:62.75% Training_loss_avg:0.634279
Epoch:4 Step:2520 Training_loss:0.737475, Acc_avg:63.00% Training_loss_avg:0.634968
Epoch:4 Step:2528 Training_loss:0.545147, Acc_avg:63.75% Training_loss_avg:0.632189
Epoch:4 Step:2536 Training_loss:0.485387, Acc_avg:63.50% Training_loss_avg:0.630323
Epoch:4 Step:2544 Training_loss:0.571330, Acc_avg:63.75% Training_loss_avg:0.629678
Epoch:4 Step:2552 Training_loss:0.476725, Acc_avg:63.50% Training_loss_avg:0.626567
Epoch:4 Step:2560 Training_loss:0.752570, Acc_avg:63.75% Training_loss_avg:0.626097
Epoch:4 Step:2568 Training_loss:0.671996, Acc_avg:63.00% Training_loss_avg:0.628457
Epoch:4 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:2728 Val_loss:0.633293, Val_Acc_avg:56.94%
Epoch:4 Step:2736 Training_loss:0.711903, Acc_avg:62.50% Training_loss_avg:0.632935
Epoch:4 Step:2744 Training_loss:0.426640, Acc_avg:62.75% Training_loss_avg:0.628876
Epoch:4 Step:2752 Training_loss:0.441044, Acc_avg:63.25% Training_loss_avg:0.624105
Epoch:4 Step:2760 Training_loss:0.670799, Acc_avg:64.00% Training_loss_avg:0.621994
Epoch:4 Step:2768 Training_loss:0.562493, Acc_avg:63.75% Training_loss_avg:0.620141
Epoch:4 Step:2776 Training_loss:0.639966, Acc_avg:64.00% Training_loss_avg:0.618254
Epoch:4 Step:2784 Training_loss:0.686932, Acc_avg:63.50% Training_loss_avg:0.620906
Epoch:4 Step:2792 Training_loss:0.509133, Acc_avg:63.50% Training_loss_avg:0.618637
Epoch:4 Step:2800 Training_loss:0.998151, Acc_avg:63.25% Training_loss_avg:0.625958
Epoch:4 Step:2808 Training_loss:0.774715, Acc_avg:63.00% Training_loss_avg:0.628046
Epoch:4 Step:2816 Training_loss:0.705545, Acc_avg:62.75% Training_loss_avg:0.628974
Epoch:4 Step:2824 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:2976 Val_loss:0.630250, Val_Acc_avg:56.94%
Epoch:4 Step:2984 Training_loss:0.640000, Acc_avg:62.75% Training_loss_avg:0.640714
Epoch:4 Step:2992 Training_loss:0.568964, Acc_avg:63.00% Training_loss_avg:0.640887
Epoch:4 Step:3000 Training_loss:0.693123, Acc_avg:63.00% Training_loss_avg:0.641536
Epoch:4 Step:3008 Training_loss:0.769695, Acc_avg:62.50% Training_loss_avg:0.645865
Epoch:4 Step:3016 Training_loss:0.524327, Acc_avg:62.75% Training_loss_avg:0.644458
Epoch:4 Step:3024 Training_loss:0.489376, Acc_avg:64.00% Training_loss_avg:0.639579
Epoch:4 Step:3032 Training_loss:0.615687, Acc_avg:64.50% Training_loss_avg:0.636949
Epoch:4 Step:3040 Training_loss:0.736089, Acc_avg:63.50% Training_loss_avg:0.639798
Epoch:4 Step:3048 Training_loss:0.581482, Acc_avg:64.00% Training_loss_avg:0.637355
Epoch:4 Step:3056 Training_loss:0.683434, Acc_avg:63.50% Training_loss_avg:0.636629
Epoch:4 Step:3064 Training_loss:0.647887, Acc_avg:63.25% Training_loss_avg:0.636883
Epoch:4 Step:3072 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:3224 Val_loss:0.642710, Val_Acc_avg:52.31%
Epoch:4 Step:3232 Training_loss:0.572043, Acc_avg:63.25% Training_loss_avg:0.629549
Epoch:4 Step:3240 Training_loss:0.609044, Acc_avg:63.25% Training_loss_avg:0.631257
Epoch:4 Step:3248 Training_loss:0.760223, Acc_avg:62.75% Training_loss_avg:0.633554
Epoch:4 Step:3256 Training_loss:0.718417, Acc_avg:62.50% Training_loss_avg:0.635204
Epoch:4 Step:3264 Training_loss:0.851812, Acc_avg:62.00% Training_loss_avg:0.639272
Epoch:4 Step:3272 Training_loss:0.612754, Acc_avg:61.75% Training_loss_avg:0.638601
Epoch:4 Step:3280 Training_loss:0.608137, Acc_avg:61.75% Training_loss_avg:0.637694
Epoch:4 Step:3288 Training_loss:0.757517, Acc_avg:60.75% Training_loss_avg:0.642164
Epoch:4 Step:3296 Training_loss:0.777500, Acc_avg:60.25% Training_loss_avg:0.644037
Epoch:4 Step:3304 Training_loss:0.600789, Acc_avg:60.25% Training_loss_avg:0.643774
Epoch:4 Step:3312 Training_loss:0.655846, Acc_avg:60.25% Training_loss_avg:0.642777
Epoch:4 Step:3320 Tr

In [24]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """

    run_name = "Softmax + CEL_5e-6 acc 3 good?"
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False

    code_df = preprocess_data(file_loc='code_dataset.jsonl')
    train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

    # Loading model from checkpoint if location provided
    if online:
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=5,
          batch_size=8,
          learning_rate=5e-6,
          validate_per=250,
          run_name=run_name,
          run_descrption="Colab with Softmax + CrossEntropyLoss, lr=5e-6, validate per 250, batch 8, 5 epochs")


In [25]:
torch.cuda.empty_cache()
main()

Insecure code counts: 3729, Total code counts: 8000, Proportion 0.466125


Some weights of the model checkpoint at codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at codebert-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for p

Epoch:0 Step:0 Training_loss:0.624135, Acc_avg:75.00% Training_loss_avg:0.624135
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:0 Val_loss:0.683608, Val_Acc_avg:58.00%
Epoch:0 Step:8 Training_loss:0.650295, Acc_avg:68.75% Training_loss_avg:0.637215
Epoch:0 Step:16 Training_loss:0.696938, Acc_avg:66.67% Training_loss_avg:0.657123
Epoch:0 Step:24 Training_loss:0.658840, Acc_avg:62.50% Training_loss_avg:0.657552
Epoch:0 Step:32 Training_loss:0.643593, Acc_avg:62.50% Training_loss_avg:0.654760
Epoch:0 Step:40 Training_loss:0.615486, Acc_avg:64.58% Training_loss_avg:0.648215
Epoch:0 Step:48 Training_loss:0.602531, Acc_avg:67.86% Training_loss_avg:0.641688
Epoch:0 Step:56 Training_loss:0.658548, Acc_avg:67.19% Training_loss_avg:0.643796
Epoch:0 Step:64 Training_loss:0.693928, Acc_avg:65.28% Training_loss_avg:0.649366
Epoch:0 Step:72 Training_loss:0.572569, Acc_avg:66.25% Training_loss_avg:0.641686
Epoch:0 Step:80 Training_loss:0.602112, Acc_avg:67.05% Training_loss_avg:0.638089
Epoch:0 Step:88 Training_loss:0.542588, Acc_avg:68.75% Training_loss_avg:0.630130
Epoch:0 Step:96 Training_loss:0.542329, Acc_av

52it [00:07,  6.61it/s]


Epoch:0 Step:248 Val_loss:0.680112, Val_Acc_avg:56.00%
Epoch:0 Step:256 Training_loss:0.770665, Acc_avg:60.98% Training_loss_avg:0.680738
Epoch:0 Step:264 Training_loss:0.706674, Acc_avg:60.66% Training_loss_avg:0.681501
Epoch:0 Step:272 Training_loss:0.662347, Acc_avg:61.07% Training_loss_avg:0.680954
Epoch:0 Step:280 Training_loss:0.712489, Acc_avg:60.42% Training_loss_avg:0.681830
Epoch:0 Step:288 Training_loss:0.685709, Acc_avg:60.14% Training_loss_avg:0.681935
Epoch:0 Step:296 Training_loss:0.721559, Acc_avg:59.87% Training_loss_avg:0.682977
Epoch:0 Step:304 Training_loss:0.695100, Acc_avg:59.94% Training_loss_avg:0.683288
Epoch:0 Step:312 Training_loss:0.803164, Acc_avg:59.06% Training_loss_avg:0.686285
Epoch:0 Step:320 Training_loss:0.605570, Acc_avg:59.76% Training_loss_avg:0.684316
Epoch:0 Step:328 Training_loss:0.710420, Acc_avg:59.52% Training_loss_avg:0.684938
Epoch:0 Step:336 Training_loss:0.687236, Acc_avg:59.30% Training_loss_avg:0.684991
Epoch:0 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:0 Step:496 Val_loss:0.685043, Val_Acc_avg:50.00%
Epoch:0 Step:504 Training_loss:0.690335, Acc_avg:54.75% Training_loss_avg:0.700216
Epoch:0 Step:512 Training_loss:0.696110, Acc_avg:54.00% Training_loss_avg:0.703197
Epoch:0 Step:520 Training_loss:0.683729, Acc_avg:54.25% Training_loss_avg:0.701571
Epoch:0 Step:528 Training_loss:0.693707, Acc_avg:54.25% Training_loss_avg:0.700436
Epoch:0 Step:536 Training_loss:0.678705, Acc_avg:54.50% Training_loss_avg:0.699226
Epoch:0 Step:544 Training_loss:0.693911, Acc_avg:54.00% Training_loss_avg:0.699035
Epoch:0 Step:552 Training_loss:0.771102, Acc_avg:53.75% Training_loss_avg:0.699678
Epoch:0 Step:560 Training_loss:0.719531, Acc_avg:53.00% Training_loss_avg:0.702592
Epoch:0 Step:568 Training_loss:0.721723, Acc_avg:53.00% Training_loss_avg:0.701996
Epoch:0 Step:576 Training_loss:0.667562, Acc_avg:53.25% Training_loss_avg:0.701038
Epoch:0 Step:584 Training_loss:0.697605, Acc_avg:53.75% Training_loss_avg:0.695775
Epoch:0 Step:592 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:0 Step:744 Val_loss:0.687672, Val_Acc_avg:50.00%
Epoch:0 Step:752 Training_loss:0.661102, Acc_avg:50.00% Training_loss_avg:0.695981
Epoch:0 Step:760 Training_loss:0.704365, Acc_avg:49.75% Training_loss_avg:0.695811
Epoch:0 Step:768 Training_loss:0.694276, Acc_avg:49.75% Training_loss_avg:0.697366
Epoch:0 Step:776 Training_loss:0.709024, Acc_avg:49.25% Training_loss_avg:0.698211
Epoch:0 Step:784 Training_loss:0.701040, Acc_avg:49.00% Training_loss_avg:0.697965
Epoch:0 Step:792 Training_loss:0.742547, Acc_avg:49.00% Training_loss_avg:0.698060
Epoch:0 Step:800 Training_loss:0.698553, Acc_avg:48.25% Training_loss_avg:0.699706
Epoch:0 Step:808 Training_loss:0.672132, Acc_avg:47.75% Training_loss_avg:0.699965
Epoch:0 Step:816 Training_loss:0.728066, Acc_avg:47.00% Training_loss_avg:0.700957
Epoch:0 Step:824 Training_loss:0.686228, Acc_avg:47.25% Training_loss_avg:0.700866
Epoch:0 Step:832 Training_loss:0.682774, Acc_avg:47.75% Training_loss_avg:0.699655
Epoch:0 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:0 Step:992 Val_loss:0.679191, Val_Acc_avg:58.00%
Epoch:0 Step:1000 Training_loss:0.795501, Acc_avg:50.25% Training_loss_avg:0.697812
Epoch:0 Step:1008 Training_loss:0.636549, Acc_avg:51.00% Training_loss_avg:0.696417
Epoch:0 Step:1016 Training_loss:0.781532, Acc_avg:50.75% Training_loss_avg:0.697894
Epoch:0 Step:1024 Training_loss:0.638510, Acc_avg:51.50% Training_loss_avg:0.695534
Epoch:0 Step:1032 Training_loss:0.704292, Acc_avg:51.50% Training_loss_avg:0.696440
Epoch:0 Step:1040 Training_loss:0.741603, Acc_avg:51.75% Training_loss_avg:0.696422
Epoch:0 Step:1048 Training_loss:0.659836, Acc_avg:51.75% Training_loss_avg:0.695703
Epoch:0 Step:1056 Training_loss:0.709616, Acc_avg:51.50% Training_loss_avg:0.696571
Epoch:0 Step:1064 Training_loss:0.583484, Acc_avg:51.75% Training_loss_avg:0.695387
Epoch:0 Step:1072 Training_loss:0.734499, Acc_avg:52.00% Training_loss_avg:0.695034
Epoch:0 Step:1080 Training_loss:0.702265, Acc_avg:52.25% Training_loss_avg:0.695673
Epoch:0 Step:1088 Tra

52it [00:07,  6.61it/s]


Epoch:0 Step:1240 Val_loss:0.683999, Val_Acc_avg:56.00%
Epoch:0 Step:1248 Training_loss:0.708312, Acc_avg:52.25% Training_loss_avg:0.697469
Epoch:0 Step:1256 Training_loss:0.698451, Acc_avg:52.00% Training_loss_avg:0.699528
Epoch:0 Step:1264 Training_loss:0.674368, Acc_avg:52.00% Training_loss_avg:0.699708
Epoch:0 Step:1272 Training_loss:0.689322, Acc_avg:51.50% Training_loss_avg:0.700574
Epoch:0 Step:1280 Training_loss:0.649749, Acc_avg:52.00% Training_loss_avg:0.699296
Epoch:0 Step:1288 Training_loss:0.688321, Acc_avg:51.75% Training_loss_avg:0.699457
Epoch:0 Step:1296 Training_loss:0.754807, Acc_avg:51.00% Training_loss_avg:0.700578
Epoch:0 Step:1304 Training_loss:0.677457, Acc_avg:51.25% Training_loss_avg:0.698869
Epoch:0 Step:1312 Training_loss:0.713383, Acc_avg:51.00% Training_loss_avg:0.699983
Epoch:0 Step:1320 Training_loss:0.686295, Acc_avg:50.75% Training_loss_avg:0.701475
Epoch:0 Step:1328 Training_loss:0.675615, Acc_avg:50.75% Training_loss_avg:0.700963
Epoch:0 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:1488 Val_loss:0.681537, Val_Acc_avg:56.00%
Epoch:0 Step:1496 Training_loss:0.706759, Acc_avg:51.00% Training_loss_avg:0.697480
Epoch:0 Step:1504 Training_loss:0.679849, Acc_avg:50.75% Training_loss_avg:0.697655
Epoch:0 Step:1512 Training_loss:0.659783, Acc_avg:50.75% Training_loss_avg:0.696530
Epoch:0 Step:1520 Training_loss:0.696043, Acc_avg:51.25% Training_loss_avg:0.695628
Epoch:0 Step:1528 Training_loss:0.719745, Acc_avg:50.50% Training_loss_avg:0.698005
Epoch:0 Step:1536 Training_loss:0.715756, Acc_avg:50.50% Training_loss_avg:0.697232
Epoch:0 Step:1544 Training_loss:0.731629, Acc_avg:50.50% Training_loss_avg:0.697338
Epoch:0 Step:1552 Training_loss:0.682797, Acc_avg:51.25% Training_loss_avg:0.695804
Epoch:0 Step:1560 Training_loss:0.663552, Acc_avg:51.50% Training_loss_avg:0.695712
Epoch:0 Step:1568 Training_loss:0.674497, Acc_avg:51.50% Training_loss_avg:0.695822
Epoch:0 Step:1576 Training_loss:0.621093, Acc_avg:52.25% Training_loss_avg:0.693899
Epoch:0 Step:1584 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:1736 Val_loss:0.678306, Val_Acc_avg:58.00%
Epoch:0 Step:1744 Training_loss:0.779880, Acc_avg:53.75% Training_loss_avg:0.691987
Epoch:0 Step:1752 Training_loss:0.689600, Acc_avg:54.25% Training_loss_avg:0.691679
Epoch:0 Step:1760 Training_loss:0.733140, Acc_avg:54.50% Training_loss_avg:0.691391
Epoch:0 Step:1768 Training_loss:0.800727, Acc_avg:54.00% Training_loss_avg:0.693820
Epoch:0 Step:1776 Training_loss:0.766003, Acc_avg:54.00% Training_loss_avg:0.694712
Epoch:0 Step:1784 Training_loss:0.753635, Acc_avg:53.75% Training_loss_avg:0.696563
Epoch:0 Step:1792 Training_loss:0.687685, Acc_avg:54.00% Training_loss_avg:0.695676
Epoch:0 Step:1800 Training_loss:0.777971, Acc_avg:53.50% Training_loss_avg:0.697620
Epoch:0 Step:1808 Training_loss:0.699054, Acc_avg:53.00% Training_loss_avg:0.698530
Epoch:0 Step:1816 Training_loss:0.732111, Acc_avg:52.75% Training_loss_avg:0.699756
Epoch:0 Step:1824 Training_loss:0.607270, Acc_avg:53.50% Training_loss_avg:0.697454
Epoch:0 Step:1832 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:1984 Val_loss:0.684305, Val_Acc_avg:54.00%
Epoch:0 Step:1992 Training_loss:0.697175, Acc_avg:52.75% Training_loss_avg:0.699168
Epoch:0 Step:2000 Training_loss:0.724545, Acc_avg:52.75% Training_loss_avg:0.698699
Epoch:0 Step:2008 Training_loss:0.749952, Acc_avg:52.00% Training_loss_avg:0.699255
Epoch:0 Step:2016 Training_loss:0.751934, Acc_avg:51.00% Training_loss_avg:0.700914
Epoch:0 Step:2024 Training_loss:0.751000, Acc_avg:50.25% Training_loss_avg:0.703151
Epoch:0 Step:2032 Training_loss:0.695019, Acc_avg:50.00% Training_loss_avg:0.701887
Epoch:0 Step:2040 Training_loss:0.686285, Acc_avg:49.75% Training_loss_avg:0.703456
Epoch:0 Step:2048 Training_loss:0.675537, Acc_avg:49.75% Training_loss_avg:0.703437
Epoch:0 Step:2056 Training_loss:0.704538, Acc_avg:50.50% Training_loss_avg:0.702464
Epoch:0 Step:2064 Training_loss:0.662984, Acc_avg:50.75% Training_loss_avg:0.701808
Epoch:0 Step:2072 Training_loss:0.680114, Acc_avg:50.75% Training_loss_avg:0.702391
Epoch:0 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:2232 Val_loss:0.691218, Val_Acc_avg:50.00%
Epoch:0 Step:2240 Training_loss:0.695827, Acc_avg:52.00% Training_loss_avg:0.693346
Epoch:0 Step:2248 Training_loss:0.706299, Acc_avg:51.25% Training_loss_avg:0.694345
Epoch:0 Step:2256 Training_loss:0.704832, Acc_avg:51.00% Training_loss_avg:0.694072
Epoch:0 Step:2264 Training_loss:0.728139, Acc_avg:51.25% Training_loss_avg:0.694115
Epoch:0 Step:2272 Training_loss:0.639662, Acc_avg:52.00% Training_loss_avg:0.692746
Epoch:0 Step:2280 Training_loss:0.733537, Acc_avg:51.25% Training_loss_avg:0.694206
Epoch:0 Step:2288 Training_loss:0.655031, Acc_avg:51.25% Training_loss_avg:0.693466
Epoch:0 Step:2296 Training_loss:0.697527, Acc_avg:51.00% Training_loss_avg:0.693762
Epoch:0 Step:2304 Training_loss:0.662824, Acc_avg:51.25% Training_loss_avg:0.693452
Epoch:0 Step:2312 Training_loss:0.659902, Acc_avg:51.00% Training_loss_avg:0.693646
Epoch:0 Step:2320 Training_loss:0.686111, Acc_avg:50.75% Training_loss_avg:0.694769
Epoch:0 Step:2328 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:2480 Val_loss:0.678701, Val_Acc_avg:58.00%
Epoch:0 Step:2488 Training_loss:0.625105, Acc_avg:55.75% Training_loss_avg:0.690150
Epoch:0 Step:2496 Training_loss:0.716183, Acc_avg:55.75% Training_loss_avg:0.689975
Epoch:0 Step:2504 Training_loss:0.630045, Acc_avg:56.00% Training_loss_avg:0.689213
Epoch:0 Step:2512 Training_loss:0.586996, Acc_avg:56.50% Training_loss_avg:0.686163
Epoch:0 Step:2520 Training_loss:0.760000, Acc_avg:56.00% Training_loss_avg:0.688205
Epoch:0 Step:2528 Training_loss:0.715796, Acc_avg:56.50% Training_loss_avg:0.687933
Epoch:0 Step:2536 Training_loss:0.690668, Acc_avg:56.25% Training_loss_avg:0.688000
Epoch:0 Step:2544 Training_loss:0.726076, Acc_avg:56.00% Training_loss_avg:0.688472
Epoch:0 Step:2552 Training_loss:0.705113, Acc_avg:56.50% Training_loss_avg:0.688397
Epoch:0 Step:2560 Training_loss:0.797130, Acc_avg:56.50% Training_loss_avg:0.689743
Epoch:0 Step:2568 Training_loss:0.708430, Acc_avg:56.75% Training_loss_avg:0.689293
Epoch:0 Step:2576 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:2728 Val_loss:0.678243, Val_Acc_avg:58.00%
Epoch:0 Step:2736 Training_loss:0.661609, Acc_avg:56.25% Training_loss_avg:0.688668
Epoch:0 Step:2744 Training_loss:0.671255, Acc_avg:55.50% Training_loss_avg:0.690261
Epoch:0 Step:2752 Training_loss:0.712522, Acc_avg:55.50% Training_loss_avg:0.690485
Epoch:0 Step:2760 Training_loss:0.658475, Acc_avg:55.75% Training_loss_avg:0.689001
Epoch:0 Step:2768 Training_loss:0.618445, Acc_avg:56.00% Training_loss_avg:0.689035
Epoch:0 Step:2776 Training_loss:0.700710, Acc_avg:55.50% Training_loss_avg:0.691375
Epoch:0 Step:2784 Training_loss:0.674972, Acc_avg:55.75% Training_loss_avg:0.689795
Epoch:0 Step:2792 Training_loss:0.709919, Acc_avg:54.75% Training_loss_avg:0.691778
Epoch:0 Step:2800 Training_loss:0.659484, Acc_avg:54.75% Training_loss_avg:0.691428
Epoch:0 Step:2808 Training_loss:0.748234, Acc_avg:54.25% Training_loss_avg:0.694750
Epoch:0 Step:2816 Training_loss:0.739063, Acc_avg:53.75% Training_loss_avg:0.695155
Epoch:0 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:2976 Val_loss:0.675806, Val_Acc_avg:56.00%
Epoch:0 Step:2984 Training_loss:0.743009, Acc_avg:55.00% Training_loss_avg:0.687577
Epoch:0 Step:2992 Training_loss:0.673597, Acc_avg:55.00% Training_loss_avg:0.688695
Epoch:0 Step:3000 Training_loss:0.667579, Acc_avg:55.25% Training_loss_avg:0.686747
Epoch:0 Step:3008 Training_loss:0.601250, Acc_avg:54.75% Training_loss_avg:0.686765
Epoch:0 Step:3016 Training_loss:0.666080, Acc_avg:54.75% Training_loss_avg:0.686394
Epoch:0 Step:3024 Training_loss:0.833701, Acc_avg:54.75% Training_loss_avg:0.687686
Epoch:0 Step:3032 Training_loss:0.670953, Acc_avg:54.50% Training_loss_avg:0.688238
Epoch:0 Step:3040 Training_loss:0.679020, Acc_avg:54.25% Training_loss_avg:0.688813
Epoch:0 Step:3048 Training_loss:0.657554, Acc_avg:54.75% Training_loss_avg:0.687598
Epoch:0 Step:3056 Training_loss:0.702115, Acc_avg:54.25% Training_loss_avg:0.689400
Epoch:0 Step:3064 Training_loss:0.626259, Acc_avg:54.75% Training_loss_avg:0.687491
Epoch:0 Step:3072 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:3224 Val_loss:0.681904, Val_Acc_avg:50.00%
Epoch:0 Step:3232 Training_loss:0.661412, Acc_avg:53.75% Training_loss_avg:0.691504
Epoch:0 Step:3240 Training_loss:0.693889, Acc_avg:53.75% Training_loss_avg:0.692716
Epoch:0 Step:3248 Training_loss:0.701719, Acc_avg:53.50% Training_loss_avg:0.692251
Epoch:0 Step:3256 Training_loss:0.691954, Acc_avg:54.00% Training_loss_avg:0.690708
Epoch:0 Step:3264 Training_loss:0.679477, Acc_avg:54.75% Training_loss_avg:0.689773
Epoch:0 Step:3272 Training_loss:0.743556, Acc_avg:54.00% Training_loss_avg:0.690390
Epoch:0 Step:3280 Training_loss:0.680010, Acc_avg:54.50% Training_loss_avg:0.690056
Epoch:0 Step:3288 Training_loss:0.663067, Acc_avg:54.50% Training_loss_avg:0.690267
Epoch:0 Step:3296 Training_loss:0.616039, Acc_avg:55.25% Training_loss_avg:0.689708
Epoch:0 Step:3304 Training_loss:0.692389, Acc_avg:55.00% Training_loss_avg:0.689377
Epoch:0 Step:3312 Training_loss:0.702165, Acc_avg:55.25% Training_loss_avg:0.688736
Epoch:0 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:0 Val_loss:0.678708, Val_Acc_avg:54.00%
Epoch:1 Step:8 Training_loss:0.721276, Acc_avg:55.33% Training_loss_avg:0.692027
Epoch:1 Step:16 Training_loss:0.681346, Acc_avg:55.08% Training_loss_avg:0.692335
Epoch:1 Step:24 Training_loss:0.792421, Acc_avg:54.58% Training_loss_avg:0.695114
Epoch:1 Step:32 Training_loss:0.676067, Acc_avg:54.83% Training_loss_avg:0.694706
Epoch:1 Step:40 Training_loss:0.809583, Acc_avg:54.08% Training_loss_avg:0.697218
Epoch:1 Step:48 Training_loss:0.786921, Acc_avg:54.08% Training_loss_avg:0.698097
Epoch:1 Step:56 Training_loss:0.613071, Acc_avg:54.33% Training_loss_avg:0.696886
Epoch:1 Step:64 Training_loss:0.695493, Acc_avg:54.58% Training_loss_avg:0.697444
Epoch:1 Step:72 Training_loss:0.639647, Acc_avg:54.58% Training_loss_avg:0.698212
Epoch:1 Step:80 Training_loss:0.706604, Acc_avg:54.33% Training_loss_avg:0.699023
Epoch:1 Step:88 Training_loss:0.681638, Acc_avg:55.33% Training_loss_avg:0.695982
Epoch:1 Step:96 Training_loss:0.637673, Acc_av

52it [00:07,  6.61it/s]


Epoch:1 Step:248 Val_loss:0.685116, Val_Acc_avg:56.00%
Epoch:1 Step:256 Training_loss:0.750651, Acc_avg:53.58% Training_loss_avg:0.701121
Epoch:1 Step:264 Training_loss:0.636281, Acc_avg:54.08% Training_loss_avg:0.699844
Epoch:1 Step:272 Training_loss:0.675866, Acc_avg:54.58% Training_loss_avg:0.698463
Epoch:1 Step:280 Training_loss:0.693916, Acc_avg:53.83% Training_loss_avg:0.699476
Epoch:1 Step:288 Training_loss:0.695317, Acc_avg:53.83% Training_loss_avg:0.699510
Epoch:1 Step:296 Training_loss:0.702424, Acc_avg:53.08% Training_loss_avg:0.700330
Epoch:1 Step:304 Training_loss:0.670668, Acc_avg:52.83% Training_loss_avg:0.699866
Epoch:1 Step:312 Training_loss:0.695245, Acc_avg:52.83% Training_loss_avg:0.699736
Epoch:1 Step:320 Training_loss:0.696597, Acc_avg:52.83% Training_loss_avg:0.699829
Epoch:1 Step:328 Training_loss:0.698755, Acc_avg:52.33% Training_loss_avg:0.700215
Epoch:1 Step:336 Training_loss:0.665143, Acc_avg:53.33% Training_loss_avg:0.698647
Epoch:1 Step:344 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:1 Step:496 Val_loss:0.690152, Val_Acc_avg:54.00%
Epoch:1 Step:504 Training_loss:0.727336, Acc_avg:50.75% Training_loss_avg:0.696290
Epoch:1 Step:512 Training_loss:0.688945, Acc_avg:50.50% Training_loss_avg:0.696054
Epoch:1 Step:520 Training_loss:0.661748, Acc_avg:50.75% Training_loss_avg:0.695152
Epoch:1 Step:528 Training_loss:0.661928, Acc_avg:51.25% Training_loss_avg:0.694974
Epoch:1 Step:536 Training_loss:0.711448, Acc_avg:51.50% Training_loss_avg:0.694778
Epoch:1 Step:544 Training_loss:0.701978, Acc_avg:51.50% Training_loss_avg:0.692911
Epoch:1 Step:552 Training_loss:0.675959, Acc_avg:51.50% Training_loss_avg:0.691101
Epoch:1 Step:560 Training_loss:0.696941, Acc_avg:50.75% Training_loss_avg:0.691662
Epoch:1 Step:568 Training_loss:0.647436, Acc_avg:51.50% Training_loss_avg:0.690236
Epoch:1 Step:576 Training_loss:0.674580, Acc_avg:51.75% Training_loss_avg:0.690315
Epoch:1 Step:584 Training_loss:0.647123, Acc_avg:52.75% Training_loss_avg:0.688561
Epoch:1 Step:592 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:1 Step:744 Val_loss:0.675323, Val_Acc_avg:58.00%
Epoch:1 Step:752 Training_loss:0.711363, Acc_avg:56.00% Training_loss_avg:0.684836
Epoch:1 Step:760 Training_loss:0.690406, Acc_avg:56.50% Training_loss_avg:0.684773
Epoch:1 Step:768 Training_loss:0.686069, Acc_avg:56.50% Training_loss_avg:0.685264
Epoch:1 Step:776 Training_loss:0.810539, Acc_avg:56.00% Training_loss_avg:0.688322
Epoch:1 Step:784 Training_loss:0.733736, Acc_avg:55.25% Training_loss_avg:0.689673
Epoch:1 Step:792 Training_loss:0.539272, Acc_avg:55.75% Training_loss_avg:0.686574
Epoch:1 Step:800 Training_loss:0.703040, Acc_avg:55.25% Training_loss_avg:0.687610
Epoch:1 Step:808 Training_loss:0.695647, Acc_avg:55.25% Training_loss_avg:0.687519
Epoch:1 Step:816 Training_loss:0.659275, Acc_avg:55.00% Training_loss_avg:0.687013
Epoch:1 Step:824 Training_loss:0.704318, Acc_avg:55.25% Training_loss_avg:0.686125
Epoch:1 Step:832 Training_loss:0.692464, Acc_avg:55.50% Training_loss_avg:0.686446
Epoch:1 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:1 Step:992 Val_loss:0.673121, Val_Acc_avg:58.00%
Epoch:1 Step:1000 Training_loss:0.762662, Acc_avg:58.75% Training_loss_avg:0.682352
Epoch:1 Step:1008 Training_loss:0.659785, Acc_avg:58.00% Training_loss_avg:0.682284
Epoch:1 Step:1016 Training_loss:0.624504, Acc_avg:58.25% Training_loss_avg:0.680902
Epoch:1 Step:1024 Training_loss:0.779271, Acc_avg:57.75% Training_loss_avg:0.682548
Epoch:1 Step:1032 Training_loss:0.701604, Acc_avg:56.75% Training_loss_avg:0.684005
Epoch:1 Step:1040 Training_loss:0.688848, Acc_avg:57.50% Training_loss_avg:0.682780
Epoch:1 Step:1048 Training_loss:0.684968, Acc_avg:57.25% Training_loss_avg:0.683247
Epoch:1 Step:1056 Training_loss:0.679913, Acc_avg:57.50% Training_loss_avg:0.682556
Epoch:1 Step:1064 Training_loss:0.580812, Acc_avg:58.00% Training_loss_avg:0.679364
Epoch:1 Step:1072 Training_loss:0.673546, Acc_avg:58.75% Training_loss_avg:0.678332
Epoch:1 Step:1080 Training_loss:0.631077, Acc_avg:59.00% Training_loss_avg:0.676442
Epoch:1 Step:1088 Tra

52it [00:07,  6.60it/s]


Epoch:1 Step:1240 Val_loss:0.668558, Val_Acc_avg:54.00%
Epoch:1 Step:1248 Training_loss:0.648689, Acc_avg:58.00% Training_loss_avg:0.682336
Epoch:1 Step:1256 Training_loss:0.686144, Acc_avg:58.00% Training_loss_avg:0.682189
Epoch:1 Step:1264 Training_loss:0.713245, Acc_avg:57.50% Training_loss_avg:0.682721
Epoch:1 Step:1272 Training_loss:0.633933, Acc_avg:57.25% Training_loss_avg:0.683321
Epoch:1 Step:1280 Training_loss:0.721177, Acc_avg:56.50% Training_loss_avg:0.685304
Epoch:1 Step:1288 Training_loss:0.747122, Acc_avg:55.75% Training_loss_avg:0.686568
Epoch:1 Step:1296 Training_loss:0.700510, Acc_avg:55.75% Training_loss_avg:0.686752
Epoch:1 Step:1304 Training_loss:0.704112, Acc_avg:56.00% Training_loss_avg:0.685384
Epoch:1 Step:1312 Training_loss:0.647800, Acc_avg:55.75% Training_loss_avg:0.686689
Epoch:1 Step:1320 Training_loss:0.780902, Acc_avg:55.25% Training_loss_avg:0.686684
Epoch:1 Step:1328 Training_loss:0.781443, Acc_avg:54.25% Training_loss_avg:0.690349
Epoch:1 Step:1336 Tr

52it [00:07,  6.61it/s]


Epoch:1 Step:1488 Val_loss:0.668494, Val_Acc_avg:56.00%
Epoch:1 Step:1496 Training_loss:0.714968, Acc_avg:56.25% Training_loss_avg:0.686316
Epoch:1 Step:1504 Training_loss:0.601130, Acc_avg:56.50% Training_loss_avg:0.686211
Epoch:1 Step:1512 Training_loss:0.770984, Acc_avg:56.25% Training_loss_avg:0.688055
Epoch:1 Step:1520 Training_loss:0.833750, Acc_avg:56.00% Training_loss_avg:0.688306
Epoch:1 Step:1528 Training_loss:0.710158, Acc_avg:55.75% Training_loss_avg:0.688494
Epoch:1 Step:1536 Training_loss:0.666130, Acc_avg:56.75% Training_loss_avg:0.687015
Epoch:1 Step:1544 Training_loss:0.605894, Acc_avg:57.25% Training_loss_avg:0.685273
Epoch:1 Step:1552 Training_loss:0.685551, Acc_avg:57.25% Training_loss_avg:0.685112
Epoch:1 Step:1560 Training_loss:0.739522, Acc_avg:56.75% Training_loss_avg:0.684585
Epoch:1 Step:1568 Training_loss:0.644799, Acc_avg:57.25% Training_loss_avg:0.683287
Epoch:1 Step:1576 Training_loss:0.767965, Acc_avg:57.00% Training_loss_avg:0.685321
Epoch:1 Step:1584 Tr

52it [00:07,  6.61it/s]


Epoch:1 Step:1736 Val_loss:0.666610, Val_Acc_avg:54.00%
Epoch:1 Step:1744 Training_loss:0.715430, Acc_avg:55.25% Training_loss_avg:0.685924
Epoch:1 Step:1752 Training_loss:0.676160, Acc_avg:55.25% Training_loss_avg:0.685786
Epoch:1 Step:1760 Training_loss:0.629511, Acc_avg:56.25% Training_loss_avg:0.683498
Epoch:1 Step:1768 Training_loss:0.733442, Acc_avg:55.50% Training_loss_avg:0.685010
Epoch:1 Step:1776 Training_loss:0.642859, Acc_avg:55.25% Training_loss_avg:0.684648
Epoch:1 Step:1784 Training_loss:0.620058, Acc_avg:55.50% Training_loss_avg:0.684262
Epoch:1 Step:1792 Training_loss:0.620403, Acc_avg:55.25% Training_loss_avg:0.684222
Epoch:1 Step:1800 Training_loss:0.655260, Acc_avg:55.25% Training_loss_avg:0.683805
Epoch:1 Step:1808 Training_loss:0.792162, Acc_avg:55.00% Training_loss_avg:0.685126
Epoch:1 Step:1816 Training_loss:0.708380, Acc_avg:54.75% Training_loss_avg:0.685918
Epoch:1 Step:1824 Training_loss:0.624424, Acc_avg:55.00% Training_loss_avg:0.685035
Epoch:1 Step:1832 Tr

52it [00:07,  6.61it/s]


Epoch:1 Step:1984 Val_loss:0.667725, Val_Acc_avg:52.00%
Epoch:1 Step:1992 Training_loss:0.685659, Acc_avg:55.50% Training_loss_avg:0.689493
Epoch:1 Step:2000 Training_loss:0.650000, Acc_avg:56.50% Training_loss_avg:0.687723
Epoch:1 Step:2008 Training_loss:0.649997, Acc_avg:56.75% Training_loss_avg:0.686768
Epoch:1 Step:2016 Training_loss:0.617024, Acc_avg:57.50% Training_loss_avg:0.685937
Epoch:1 Step:2024 Training_loss:0.722295, Acc_avg:57.50% Training_loss_avg:0.686061
Epoch:1 Step:2032 Training_loss:0.675009, Acc_avg:57.00% Training_loss_avg:0.685821
Epoch:1 Step:2040 Training_loss:0.597217, Acc_avg:57.75% Training_loss_avg:0.683059
Epoch:1 Step:2048 Training_loss:0.644274, Acc_avg:57.75% Training_loss_avg:0.682840
Epoch:1 Step:2056 Training_loss:0.680858, Acc_avg:57.75% Training_loss_avg:0.682809
Epoch:1 Step:2064 Training_loss:0.680312, Acc_avg:58.50% Training_loss_avg:0.682786
Epoch:1 Step:2072 Training_loss:0.734847, Acc_avg:58.50% Training_loss_avg:0.683184
Epoch:1 Step:2080 Tr

52it [00:07,  6.61it/s]


Epoch:1 Step:2232 Val_loss:0.672774, Val_Acc_avg:58.00%
Epoch:1 Step:2240 Training_loss:0.659909, Acc_avg:61.75% Training_loss_avg:0.670773
Epoch:1 Step:2248 Training_loss:0.599755, Acc_avg:62.50% Training_loss_avg:0.668343
Epoch:1 Step:2256 Training_loss:0.630013, Acc_avg:63.25% Training_loss_avg:0.664852
Epoch:1 Step:2264 Training_loss:0.606963, Acc_avg:64.00% Training_loss_avg:0.661772
Epoch:1 Step:2272 Training_loss:0.740713, Acc_avg:64.25% Training_loss_avg:0.662041
Epoch:1 Step:2280 Training_loss:0.515835, Acc_avg:64.75% Training_loss_avg:0.658806
Epoch:1 Step:2288 Training_loss:0.596418, Acc_avg:65.25% Training_loss_avg:0.657234
Epoch:1 Step:2296 Training_loss:0.615792, Acc_avg:65.25% Training_loss_avg:0.657090
Epoch:1 Step:2304 Training_loss:0.628179, Acc_avg:65.25% Training_loss_avg:0.655565
Epoch:1 Step:2312 Training_loss:0.691097, Acc_avg:65.50% Training_loss_avg:0.655598
Epoch:1 Step:2320 Training_loss:0.567705, Acc_avg:65.75% Training_loss_avg:0.653945
Epoch:1 Step:2328 Tr

52it [00:07,  6.61it/s]


Epoch:1 Step:2480 Val_loss:0.648841, Val_Acc_avg:50.00%
Epoch:1 Step:2488 Training_loss:0.610851, Acc_avg:66.75% Training_loss_avg:0.648729
Epoch:1 Step:2496 Training_loss:0.701685, Acc_avg:66.50% Training_loss_avg:0.649868
Epoch:1 Step:2504 Training_loss:0.535876, Acc_avg:67.25% Training_loss_avg:0.646148
Epoch:1 Step:2512 Training_loss:0.647991, Acc_avg:67.25% Training_loss_avg:0.647134
Epoch:1 Step:2520 Training_loss:0.655096, Acc_avg:67.00% Training_loss_avg:0.647274
Epoch:1 Step:2528 Training_loss:0.669793, Acc_avg:66.50% Training_loss_avg:0.648917
Epoch:1 Step:2536 Training_loss:0.827365, Acc_avg:66.00% Training_loss_avg:0.653315
Epoch:1 Step:2544 Training_loss:0.561275, Acc_avg:66.25% Training_loss_avg:0.650946
Epoch:1 Step:2552 Training_loss:0.781637, Acc_avg:65.50% Training_loss_avg:0.652550
Epoch:1 Step:2560 Training_loss:0.560919, Acc_avg:66.25% Training_loss_avg:0.648838
Epoch:1 Step:2568 Training_loss:0.491045, Acc_avg:66.75% Training_loss_avg:0.645646
Epoch:1 Step:2576 Tr

52it [00:07,  6.61it/s]


Epoch:1 Step:2728 Val_loss:0.642229, Val_Acc_avg:52.00%
Epoch:1 Step:2736 Training_loss:0.676563, Acc_avg:61.75% Training_loss_avg:0.664241
Epoch:1 Step:2744 Training_loss:0.706411, Acc_avg:61.50% Training_loss_avg:0.664930
Epoch:1 Step:2752 Training_loss:0.639635, Acc_avg:62.00% Training_loss_avg:0.660465
Epoch:1 Step:2760 Training_loss:0.628464, Acc_avg:62.50% Training_loss_avg:0.657034
Epoch:1 Step:2768 Training_loss:0.697112, Acc_avg:61.50% Training_loss_avg:0.659670
Epoch:1 Step:2776 Training_loss:0.645525, Acc_avg:61.25% Training_loss_avg:0.659744
Epoch:1 Step:2784 Training_loss:0.635910, Acc_avg:61.50% Training_loss_avg:0.660400
Epoch:1 Step:2792 Training_loss:0.712253, Acc_avg:61.25% Training_loss_avg:0.663202
Epoch:1 Step:2800 Training_loss:0.573110, Acc_avg:61.75% Training_loss_avg:0.661456
Epoch:1 Step:2808 Training_loss:0.764360, Acc_avg:61.00% Training_loss_avg:0.664802
Epoch:1 Step:2816 Training_loss:0.653924, Acc_avg:61.25% Training_loss_avg:0.664727
Epoch:1 Step:2824 Tr

52it [00:07,  6.61it/s]


Epoch:1 Step:2976 Val_loss:0.659207, Val_Acc_avg:56.00%
Epoch:1 Step:2984 Training_loss:0.646820, Acc_avg:59.50% Training_loss_avg:0.668498
Epoch:1 Step:2992 Training_loss:0.677055, Acc_avg:59.25% Training_loss_avg:0.671197
Epoch:1 Step:3000 Training_loss:0.611519, Acc_avg:59.50% Training_loss_avg:0.670708
Epoch:1 Step:3008 Training_loss:0.598294, Acc_avg:60.75% Training_loss_avg:0.664808
Epoch:1 Step:3016 Training_loss:0.579135, Acc_avg:61.00% Training_loss_avg:0.662148
Epoch:1 Step:3024 Training_loss:0.715320, Acc_avg:61.00% Training_loss_avg:0.661416
Epoch:1 Step:3032 Training_loss:0.494968, Acc_avg:61.25% Training_loss_avg:0.658331
Epoch:1 Step:3040 Training_loss:0.695147, Acc_avg:61.50% Training_loss_avg:0.656277
Epoch:1 Step:3048 Training_loss:0.779709, Acc_avg:60.50% Training_loss_avg:0.658459
Epoch:1 Step:3056 Training_loss:0.696456, Acc_avg:60.50% Training_loss_avg:0.660134
Epoch:1 Step:3064 Training_loss:0.767846, Acc_avg:59.75% Training_loss_avg:0.664245
Epoch:1 Step:3072 Tr

52it [00:07,  6.61it/s]


Epoch:1 Step:3224 Val_loss:0.640089, Val_Acc_avg:54.00%
Epoch:1 Step:3232 Training_loss:0.633338, Acc_avg:58.75% Training_loss_avg:0.665787
Epoch:1 Step:3240 Training_loss:0.764401, Acc_avg:58.25% Training_loss_avg:0.668624
Epoch:1 Step:3248 Training_loss:0.609972, Acc_avg:59.00% Training_loss_avg:0.663926
Epoch:1 Step:3256 Training_loss:0.568921, Acc_avg:59.25% Training_loss_avg:0.661599
Epoch:1 Step:3264 Training_loss:0.698647, Acc_avg:58.75% Training_loss_avg:0.663338
Epoch:1 Step:3272 Training_loss:0.676504, Acc_avg:58.25% Training_loss_avg:0.665774
Epoch:1 Step:3280 Training_loss:0.642899, Acc_avg:58.25% Training_loss_avg:0.666115
Epoch:1 Step:3288 Training_loss:0.603202, Acc_avg:58.50% Training_loss_avg:0.664589
Epoch:1 Step:3296 Training_loss:0.653811, Acc_avg:59.00% Training_loss_avg:0.663963
Epoch:1 Step:3304 Training_loss:0.672976, Acc_avg:59.00% Training_loss_avg:0.663501
Epoch:1 Step:3312 Training_loss:0.625226, Acc_avg:58.75% Training_loss_avg:0.664664
Epoch:1 Step:3320 Tr

52it [00:07,  6.61it/s]


Epoch:2 Step:0 Val_loss:0.646330, Val_Acc_avg:50.00%
Epoch:2 Step:8 Training_loss:0.606139, Acc_avg:59.08% Training_loss_avg:0.666150
Epoch:2 Step:16 Training_loss:0.685687, Acc_avg:59.08% Training_loss_avg:0.665689
Epoch:2 Step:24 Training_loss:0.699266, Acc_avg:58.83% Training_loss_avg:0.664856
Epoch:2 Step:32 Training_loss:0.704973, Acc_avg:58.33% Training_loss_avg:0.665033
Epoch:2 Step:40 Training_loss:0.703702, Acc_avg:57.58% Training_loss_avg:0.667605
Epoch:2 Step:48 Training_loss:0.679698, Acc_avg:58.08% Training_loss_avg:0.668262
Epoch:2 Step:56 Training_loss:0.596181, Acc_avg:58.08% Training_loss_avg:0.666645
Epoch:2 Step:64 Training_loss:0.533846, Acc_avg:58.08% Training_loss_avg:0.665091
Epoch:2 Step:72 Training_loss:0.676501, Acc_avg:57.33% Training_loss_avg:0.666656
Epoch:2 Step:80 Training_loss:0.721132, Acc_avg:56.58% Training_loss_avg:0.669496
Epoch:2 Step:88 Training_loss:0.649562, Acc_avg:57.08% Training_loss_avg:0.668180
Epoch:2 Step:96 Training_loss:0.652630, Acc_av

52it [00:07,  6.61it/s]


Epoch:2 Step:248 Val_loss:0.635136, Val_Acc_avg:52.00%
Epoch:2 Step:256 Training_loss:0.791416, Acc_avg:58.83% Training_loss_avg:0.655939
Epoch:2 Step:264 Training_loss:0.751772, Acc_avg:58.08% Training_loss_avg:0.659619
Epoch:2 Step:272 Training_loss:0.680163, Acc_avg:57.58% Training_loss_avg:0.661115
Epoch:2 Step:280 Training_loss:0.594352, Acc_avg:58.08% Training_loss_avg:0.658415
Epoch:2 Step:288 Training_loss:0.712395, Acc_avg:57.83% Training_loss_avg:0.658820
Epoch:2 Step:296 Training_loss:0.777111, Acc_avg:57.58% Training_loss_avg:0.661695
Epoch:2 Step:304 Training_loss:0.761092, Acc_avg:57.58% Training_loss_avg:0.661629
Epoch:2 Step:312 Training_loss:0.638748, Acc_avg:57.33% Training_loss_avg:0.662205
Epoch:2 Step:320 Training_loss:0.572772, Acc_avg:58.08% Training_loss_avg:0.662282
Epoch:2 Step:328 Training_loss:0.707280, Acc_avg:58.58% Training_loss_avg:0.662454
Epoch:2 Step:336 Training_loss:0.647070, Acc_avg:58.33% Training_loss_avg:0.661866
Epoch:2 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:2 Step:496 Val_loss:0.628500, Val_Acc_avg:52.00%
Epoch:2 Step:504 Training_loss:0.579105, Acc_avg:58.50% Training_loss_avg:0.661879
Epoch:2 Step:512 Training_loss:0.824972, Acc_avg:57.75% Training_loss_avg:0.664999
Epoch:2 Step:520 Training_loss:0.543582, Acc_avg:58.50% Training_loss_avg:0.662520
Epoch:2 Step:528 Training_loss:0.596869, Acc_avg:58.75% Training_loss_avg:0.662724
Epoch:2 Step:536 Training_loss:0.514909, Acc_avg:58.25% Training_loss_avg:0.663584
Epoch:2 Step:544 Training_loss:0.659429, Acc_avg:58.50% Training_loss_avg:0.664347
Epoch:2 Step:552 Training_loss:1.209558, Acc_avg:57.50% Training_loss_avg:0.677285
Epoch:2 Step:560 Training_loss:0.802162, Acc_avg:57.00% Training_loss_avg:0.680098
Epoch:2 Step:568 Training_loss:0.730233, Acc_avg:57.50% Training_loss_avg:0.681391
Epoch:2 Step:576 Training_loss:0.849149, Acc_avg:57.00% Training_loss_avg:0.684501
Epoch:2 Step:584 Training_loss:0.616155, Acc_avg:57.50% Training_loss_avg:0.682064
Epoch:2 Step:592 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:2 Step:744 Val_loss:0.667815, Val_Acc_avg:56.00%
Epoch:2 Step:752 Training_loss:0.762849, Acc_avg:58.75% Training_loss_avg:0.687573
Epoch:2 Step:760 Training_loss:0.625348, Acc_avg:58.75% Training_loss_avg:0.689775
Epoch:2 Step:768 Training_loss:0.663606, Acc_avg:58.00% Training_loss_avg:0.694565
Epoch:2 Step:776 Training_loss:0.578216, Acc_avg:58.00% Training_loss_avg:0.691991
Epoch:2 Step:784 Training_loss:0.711525, Acc_avg:57.25% Training_loss_avg:0.694474
Epoch:2 Step:792 Training_loss:0.673459, Acc_avg:57.00% Training_loss_avg:0.695700
Epoch:2 Step:800 Training_loss:0.696092, Acc_avg:56.75% Training_loss_avg:0.697450
Epoch:2 Step:808 Training_loss:0.557577, Acc_avg:57.50% Training_loss_avg:0.693814
Epoch:2 Step:816 Training_loss:0.648276, Acc_avg:57.75% Training_loss_avg:0.690882
Epoch:2 Step:824 Training_loss:0.863372, Acc_avg:56.75% Training_loss_avg:0.697126
Epoch:2 Step:832 Training_loss:0.704982, Acc_avg:57.50% Training_loss_avg:0.693524
Epoch:2 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:2 Step:992 Val_loss:0.680071, Val_Acc_avg:56.00%
Epoch:2 Step:1000 Training_loss:0.583372, Acc_avg:59.75% Training_loss_avg:0.679158
Epoch:2 Step:1008 Training_loss:0.690821, Acc_avg:60.25% Training_loss_avg:0.678013
Epoch:2 Step:1016 Training_loss:0.756450, Acc_avg:59.75% Training_loss_avg:0.680529
Epoch:2 Step:1024 Training_loss:0.693795, Acc_avg:59.50% Training_loss_avg:0.680284
Epoch:2 Step:1032 Training_loss:0.660063, Acc_avg:59.50% Training_loss_avg:0.680063
Epoch:2 Step:1040 Training_loss:0.753308, Acc_avg:58.75% Training_loss_avg:0.684976
Epoch:2 Step:1048 Training_loss:0.671066, Acc_avg:58.50% Training_loss_avg:0.686523
Epoch:2 Step:1056 Training_loss:0.637167, Acc_avg:58.75% Training_loss_avg:0.684861
Epoch:2 Step:1064 Training_loss:0.674209, Acc_avg:58.50% Training_loss_avg:0.684329
Epoch:2 Step:1072 Training_loss:0.569909, Acc_avg:59.00% Training_loss_avg:0.680231
Epoch:2 Step:1080 Training_loss:0.664793, Acc_avg:59.00% Training_loss_avg:0.677928
Epoch:2 Step:1088 Tra

52it [00:07,  6.60it/s]


Epoch:2 Step:1240 Val_loss:0.651153, Val_Acc_avg:54.00%
Epoch:2 Step:1248 Training_loss:0.659858, Acc_avg:60.50% Training_loss_avg:0.667377
Epoch:2 Step:1256 Training_loss:0.769823, Acc_avg:60.25% Training_loss_avg:0.667892
Epoch:2 Step:1264 Training_loss:0.657711, Acc_avg:60.25% Training_loss_avg:0.668507
Epoch:2 Step:1272 Training_loss:0.663900, Acc_avg:60.50% Training_loss_avg:0.668521
Epoch:2 Step:1280 Training_loss:0.618730, Acc_avg:60.50% Training_loss_avg:0.668033
Epoch:2 Step:1288 Training_loss:0.616002, Acc_avg:61.50% Training_loss_avg:0.665889
Epoch:2 Step:1296 Training_loss:0.604299, Acc_avg:61.75% Training_loss_avg:0.663978
Epoch:2 Step:1304 Training_loss:0.685902, Acc_avg:62.00% Training_loss_avg:0.662349
Epoch:2 Step:1312 Training_loss:0.707822, Acc_avg:61.75% Training_loss_avg:0.664256
Epoch:2 Step:1320 Training_loss:0.609843, Acc_avg:62.00% Training_loss_avg:0.663882
Epoch:2 Step:1328 Training_loss:0.611005, Acc_avg:62.00% Training_loss_avg:0.664483
Epoch:2 Step:1336 Tr

52it [00:07,  6.61it/s]


Epoch:2 Step:1488 Val_loss:0.652247, Val_Acc_avg:56.00%
Epoch:2 Step:1496 Training_loss:0.657858, Acc_avg:61.50% Training_loss_avg:0.667494
Epoch:2 Step:1504 Training_loss:0.631695, Acc_avg:62.25% Training_loss_avg:0.664027
Epoch:2 Step:1512 Training_loss:0.541436, Acc_avg:62.75% Training_loss_avg:0.662181
Epoch:2 Step:1520 Training_loss:0.746618, Acc_avg:62.75% Training_loss_avg:0.663219
Epoch:2 Step:1528 Training_loss:0.860527, Acc_avg:62.00% Training_loss_avg:0.667026
Epoch:2 Step:1536 Training_loss:0.682070, Acc_avg:62.00% Training_loss_avg:0.668163
Epoch:2 Step:1544 Training_loss:0.765043, Acc_avg:61.25% Training_loss_avg:0.671878
Epoch:2 Step:1552 Training_loss:0.645010, Acc_avg:61.25% Training_loss_avg:0.671916
Epoch:2 Step:1560 Training_loss:0.790121, Acc_avg:60.75% Training_loss_avg:0.673602
Epoch:2 Step:1568 Training_loss:0.688406, Acc_avg:60.50% Training_loss_avg:0.676025
Epoch:2 Step:1576 Training_loss:0.784818, Acc_avg:59.25% Training_loss_avg:0.679736
Epoch:2 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:1736 Val_loss:0.645877, Val_Acc_avg:54.00%
Epoch:2 Step:1744 Training_loss:0.707300, Acc_avg:58.75% Training_loss_avg:0.678032
Epoch:2 Step:1752 Training_loss:0.597709, Acc_avg:59.25% Training_loss_avg:0.677554
Epoch:2 Step:1760 Training_loss:0.662268, Acc_avg:59.00% Training_loss_avg:0.674844
Epoch:2 Step:1768 Training_loss:0.667305, Acc_avg:59.75% Training_loss_avg:0.672225
Epoch:2 Step:1776 Training_loss:0.612301, Acc_avg:60.50% Training_loss_avg:0.668767
Epoch:2 Step:1784 Training_loss:0.652127, Acc_avg:60.50% Training_loss_avg:0.666975
Epoch:2 Step:1792 Training_loss:0.646349, Acc_avg:61.00% Training_loss_avg:0.664517
Epoch:2 Step:1800 Training_loss:0.593775, Acc_avg:61.75% Training_loss_avg:0.659626
Epoch:2 Step:1808 Training_loss:0.688465, Acc_avg:61.75% Training_loss_avg:0.661150
Epoch:2 Step:1816 Training_loss:0.664977, Acc_avg:61.50% Training_loss_avg:0.661359
Epoch:2 Step:1824 Training_loss:0.662660, Acc_avg:61.75% Training_loss_avg:0.659219
Epoch:2 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:1984 Val_loss:0.633651, Val_Acc_avg:50.00%
Epoch:2 Step:1992 Training_loss:0.757788, Acc_avg:60.00% Training_loss_avg:0.665811
Epoch:2 Step:2000 Training_loss:0.637631, Acc_avg:59.50% Training_loss_avg:0.666667
Epoch:2 Step:2008 Training_loss:0.594251, Acc_avg:60.00% Training_loss_avg:0.665007
Epoch:2 Step:2016 Training_loss:0.741823, Acc_avg:60.00% Training_loss_avg:0.666376
Epoch:2 Step:2024 Training_loss:0.653440, Acc_avg:59.75% Training_loss_avg:0.667082
Epoch:2 Step:2032 Training_loss:0.644704, Acc_avg:59.75% Training_loss_avg:0.666650
Epoch:2 Step:2040 Training_loss:0.584264, Acc_avg:60.00% Training_loss_avg:0.665498
Epoch:2 Step:2048 Training_loss:0.733714, Acc_avg:59.00% Training_loss_avg:0.668308
Epoch:2 Step:2056 Training_loss:0.697439, Acc_avg:58.75% Training_loss_avg:0.669311
Epoch:2 Step:2064 Training_loss:0.649985, Acc_avg:58.50% Training_loss_avg:0.670444
Epoch:2 Step:2072 Training_loss:0.701366, Acc_avg:58.75% Training_loss_avg:0.670372
Epoch:2 Step:2080 Tr

52it [00:07,  6.61it/s]


Epoch:2 Step:2232 Val_loss:0.638312, Val_Acc_avg:48.00%
Epoch:2 Step:2240 Training_loss:0.586903, Acc_avg:61.50% Training_loss_avg:0.654827
Epoch:2 Step:2248 Training_loss:0.592277, Acc_avg:62.25% Training_loss_avg:0.651231
Epoch:2 Step:2256 Training_loss:0.702997, Acc_avg:61.50% Training_loss_avg:0.655183
Epoch:2 Step:2264 Training_loss:0.768868, Acc_avg:61.00% Training_loss_avg:0.655514
Epoch:2 Step:2272 Training_loss:0.543520, Acc_avg:61.00% Training_loss_avg:0.654623
Epoch:2 Step:2280 Training_loss:0.662341, Acc_avg:60.50% Training_loss_avg:0.655358
Epoch:2 Step:2288 Training_loss:0.660895, Acc_avg:60.75% Training_loss_avg:0.653670
Epoch:2 Step:2296 Training_loss:0.742316, Acc_avg:59.75% Training_loss_avg:0.658685
Epoch:2 Step:2304 Training_loss:0.522220, Acc_avg:60.50% Training_loss_avg:0.654672
Epoch:2 Step:2312 Training_loss:0.688167, Acc_avg:61.00% Training_loss_avg:0.654655
Epoch:2 Step:2320 Training_loss:0.656019, Acc_avg:60.75% Training_loss_avg:0.654525
Epoch:2 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2480 Val_loss:0.643503, Val_Acc_avg:58.00%
Epoch:2 Step:2488 Training_loss:0.621353, Acc_avg:64.50% Training_loss_avg:0.624964
Epoch:2 Step:2496 Training_loss:0.909434, Acc_avg:63.50% Training_loss_avg:0.628525
Epoch:2 Step:2504 Training_loss:0.556289, Acc_avg:64.00% Training_loss_avg:0.626802
Epoch:2 Step:2512 Training_loss:0.640239, Acc_avg:63.25% Training_loss_avg:0.630364
Epoch:2 Step:2520 Training_loss:0.607607, Acc_avg:63.75% Training_loss_avg:0.630174
Epoch:2 Step:2528 Training_loss:0.588944, Acc_avg:63.75% Training_loss_avg:0.628804
Epoch:2 Step:2536 Training_loss:0.719983, Acc_avg:63.25% Training_loss_avg:0.631955
Epoch:2 Step:2544 Training_loss:0.510784, Acc_avg:64.00% Training_loss_avg:0.628854
Epoch:2 Step:2552 Training_loss:0.759290, Acc_avg:64.25% Training_loss_avg:0.629800
Epoch:2 Step:2560 Training_loss:0.557830, Acc_avg:64.25% Training_loss_avg:0.628971
Epoch:2 Step:2568 Training_loss:0.715925, Acc_avg:63.75% Training_loss_avg:0.630890
Epoch:2 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2728 Val_loss:0.624603, Val_Acc_avg:56.00%
Epoch:2 Step:2736 Training_loss:0.570607, Acc_avg:63.75% Training_loss_avg:0.628983
Epoch:2 Step:2744 Training_loss:0.631697, Acc_avg:63.75% Training_loss_avg:0.628174
Epoch:2 Step:2752 Training_loss:0.613559, Acc_avg:63.25% Training_loss_avg:0.629776
Epoch:2 Step:2760 Training_loss:0.561781, Acc_avg:63.00% Training_loss_avg:0.629073
Epoch:2 Step:2768 Training_loss:0.791382, Acc_avg:61.75% Training_loss_avg:0.633418
Epoch:2 Step:2776 Training_loss:0.854251, Acc_avg:61.75% Training_loss_avg:0.636206
Epoch:2 Step:2784 Training_loss:0.736773, Acc_avg:61.50% Training_loss_avg:0.635982
Epoch:2 Step:2792 Training_loss:0.589467, Acc_avg:62.25% Training_loss_avg:0.633057
Epoch:2 Step:2800 Training_loss:0.916176, Acc_avg:62.25% Training_loss_avg:0.636497
Epoch:2 Step:2808 Training_loss:0.607635, Acc_avg:61.75% Training_loss_avg:0.639386
Epoch:2 Step:2816 Training_loss:0.858076, Acc_avg:61.00% Training_loss_avg:0.646358
Epoch:2 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2976 Val_loss:0.635876, Val_Acc_avg:54.00%
Epoch:2 Step:2984 Training_loss:0.629270, Acc_avg:64.50% Training_loss_avg:0.633432
Epoch:2 Step:2992 Training_loss:0.599937, Acc_avg:64.25% Training_loss_avg:0.633983
Epoch:2 Step:3000 Training_loss:0.658366, Acc_avg:63.50% Training_loss_avg:0.639061
Epoch:2 Step:3008 Training_loss:0.610110, Acc_avg:64.25% Training_loss_avg:0.635607
Epoch:2 Step:3016 Training_loss:0.589203, Acc_avg:64.25% Training_loss_avg:0.638049
Epoch:2 Step:3024 Training_loss:0.650487, Acc_avg:63.75% Training_loss_avg:0.638073
Epoch:2 Step:3032 Training_loss:0.533401, Acc_avg:64.00% Training_loss_avg:0.636280
Epoch:2 Step:3040 Training_loss:0.705657, Acc_avg:63.75% Training_loss_avg:0.636629
Epoch:2 Step:3048 Training_loss:0.593517, Acc_avg:63.50% Training_loss_avg:0.637711
Epoch:2 Step:3056 Training_loss:0.742838, Acc_avg:63.50% Training_loss_avg:0.640118
Epoch:2 Step:3064 Training_loss:0.666222, Acc_avg:63.50% Training_loss_avg:0.642356
Epoch:2 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:3224 Val_loss:0.633029, Val_Acc_avg:54.00%
Epoch:2 Step:3232 Training_loss:0.500621, Acc_avg:66.25% Training_loss_avg:0.618147
Epoch:2 Step:3240 Training_loss:0.652019, Acc_avg:66.00% Training_loss_avg:0.618593
Epoch:2 Step:3248 Training_loss:0.613607, Acc_avg:66.00% Training_loss_avg:0.618993
Epoch:2 Step:3256 Training_loss:0.676701, Acc_avg:65.50% Training_loss_avg:0.620231
Epoch:2 Step:3264 Training_loss:0.550420, Acc_avg:65.75% Training_loss_avg:0.616455
Epoch:2 Step:3272 Training_loss:0.615841, Acc_avg:65.00% Training_loss_avg:0.616560
Epoch:2 Step:3280 Training_loss:0.490168, Acc_avg:65.25% Training_loss_avg:0.614957
Epoch:2 Step:3288 Training_loss:0.786148, Acc_avg:64.50% Training_loss_avg:0.620100
Epoch:2 Step:3296 Training_loss:0.738339, Acc_avg:63.50% Training_loss_avg:0.624963
Epoch:2 Step:3304 Training_loss:0.558152, Acc_avg:63.75% Training_loss_avg:0.624264
Epoch:2 Step:3312 Training_loss:0.722002, Acc_avg:63.50% Training_loss_avg:0.625635
Epoch:2 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:0 Val_loss:0.626198, Val_Acc_avg:54.00%
Epoch:3 Step:8 Training_loss:0.476432, Acc_avg:63.83% Training_loss_avg:0.625927
Epoch:3 Step:16 Training_loss:0.797997, Acc_avg:63.33% Training_loss_avg:0.628953
Epoch:3 Step:24 Training_loss:0.565958, Acc_avg:64.08% Training_loss_avg:0.625366
Epoch:3 Step:32 Training_loss:0.496909, Acc_avg:64.08% Training_loss_avg:0.623178
Epoch:3 Step:40 Training_loss:0.576048, Acc_avg:64.08% Training_loss_avg:0.621083
Epoch:3 Step:48 Training_loss:0.721700, Acc_avg:63.33% Training_loss_avg:0.622932
Epoch:3 Step:56 Training_loss:0.743952, Acc_avg:62.83% Training_loss_avg:0.625812
Epoch:3 Step:64 Training_loss:0.476105, Acc_avg:63.58% Training_loss_avg:0.622167
Epoch:3 Step:72 Training_loss:0.597663, Acc_avg:63.33% Training_loss_avg:0.621918
Epoch:3 Step:80 Training_loss:0.676724, Acc_avg:63.08% Training_loss_avg:0.623669
Epoch:3 Step:88 Training_loss:0.775710, Acc_avg:63.33% Training_loss_avg:0.626173
Epoch:3 Step:96 Training_loss:0.834795, Acc_av

52it [00:07,  6.61it/s]


Epoch:3 Step:248 Val_loss:0.646170, Val_Acc_avg:50.00%
Epoch:3 Step:256 Training_loss:0.598997, Acc_avg:58.58% Training_loss_avg:0.648548
Epoch:3 Step:264 Training_loss:0.591485, Acc_avg:59.58% Training_loss_avg:0.644398
Epoch:3 Step:272 Training_loss:0.631791, Acc_avg:59.08% Training_loss_avg:0.647270
Epoch:3 Step:280 Training_loss:0.588468, Acc_avg:59.08% Training_loss_avg:0.648109
Epoch:3 Step:288 Training_loss:0.771706, Acc_avg:58.33% Training_loss_avg:0.652568
Epoch:3 Step:296 Training_loss:0.630083, Acc_avg:57.33% Training_loss_avg:0.655158
Epoch:3 Step:304 Training_loss:0.773685, Acc_avg:56.83% Training_loss_avg:0.657591
Epoch:3 Step:312 Training_loss:0.626425, Acc_avg:57.08% Training_loss_avg:0.657847
Epoch:3 Step:320 Training_loss:0.782333, Acc_avg:56.33% Training_loss_avg:0.659960
Epoch:3 Step:328 Training_loss:0.734087, Acc_avg:56.08% Training_loss_avg:0.663633
Epoch:3 Step:336 Training_loss:0.613187, Acc_avg:56.08% Training_loss_avg:0.663580
Epoch:3 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:3 Step:496 Val_loss:0.642844, Val_Acc_avg:56.00%
Epoch:3 Step:504 Training_loss:0.565521, Acc_avg:60.00% Training_loss_avg:0.642099
Epoch:3 Step:512 Training_loss:0.750837, Acc_avg:60.00% Training_loss_avg:0.645187
Epoch:3 Step:520 Training_loss:0.703064, Acc_avg:60.50% Training_loss_avg:0.640537
Epoch:3 Step:528 Training_loss:0.722410, Acc_avg:60.25% Training_loss_avg:0.642022
Epoch:3 Step:536 Training_loss:0.561985, Acc_avg:60.50% Training_loss_avg:0.641136
Epoch:3 Step:544 Training_loss:0.564742, Acc_avg:60.75% Training_loss_avg:0.640603
Epoch:3 Step:552 Training_loss:0.835004, Acc_avg:60.50% Training_loss_avg:0.642323
Epoch:3 Step:560 Training_loss:0.562506, Acc_avg:61.25% Training_loss_avg:0.640476
Epoch:3 Step:568 Training_loss:0.737164, Acc_avg:61.75% Training_loss_avg:0.641119
Epoch:3 Step:576 Training_loss:0.520992, Acc_avg:61.75% Training_loss_avg:0.642180
Epoch:3 Step:584 Training_loss:0.741829, Acc_avg:61.00% Training_loss_avg:0.645810
Epoch:3 Step:592 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:3 Step:744 Val_loss:0.619803, Val_Acc_avg:54.00%
Epoch:3 Step:752 Training_loss:0.594890, Acc_avg:64.75% Training_loss_avg:0.628993
Epoch:3 Step:760 Training_loss:0.580084, Acc_avg:64.75% Training_loss_avg:0.628562
Epoch:3 Step:768 Training_loss:0.596174, Acc_avg:65.00% Training_loss_avg:0.629097
Epoch:3 Step:776 Training_loss:0.729156, Acc_avg:64.25% Training_loss_avg:0.633308
Epoch:3 Step:784 Training_loss:0.389440, Acc_avg:65.00% Training_loss_avg:0.627828
Epoch:3 Step:792 Training_loss:0.800825, Acc_avg:64.25% Training_loss_avg:0.631803
Epoch:3 Step:800 Training_loss:0.612747, Acc_avg:64.50% Training_loss_avg:0.630247
Epoch:3 Step:808 Training_loss:0.573622, Acc_avg:64.50% Training_loss_avg:0.630774
Epoch:3 Step:816 Training_loss:0.455870, Acc_avg:65.25% Training_loss_avg:0.626039
Epoch:3 Step:824 Training_loss:0.692368, Acc_avg:64.75% Training_loss_avg:0.627532
Epoch:3 Step:832 Training_loss:0.715134, Acc_avg:64.25% Training_loss_avg:0.629459
Epoch:3 Step:840 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:3 Step:992 Val_loss:0.613943, Val_Acc_avg:54.00%
Epoch:3 Step:1000 Training_loss:0.479336, Acc_avg:64.50% Training_loss_avg:0.622015
Epoch:3 Step:1008 Training_loss:0.458805, Acc_avg:64.50% Training_loss_avg:0.619542
Epoch:3 Step:1016 Training_loss:0.540052, Acc_avg:64.50% Training_loss_avg:0.620060
Epoch:3 Step:1024 Training_loss:0.613328, Acc_avg:64.75% Training_loss_avg:0.620098
Epoch:3 Step:1032 Training_loss:0.744008, Acc_avg:64.25% Training_loss_avg:0.620465
Epoch:3 Step:1040 Training_loss:0.579515, Acc_avg:64.75% Training_loss_avg:0.617178
Epoch:3 Step:1048 Training_loss:0.616740, Acc_avg:65.25% Training_loss_avg:0.615593
Epoch:3 Step:1056 Training_loss:0.665769, Acc_avg:65.25% Training_loss_avg:0.617622
Epoch:3 Step:1064 Training_loss:0.569150, Acc_avg:66.00% Training_loss_avg:0.615655
Epoch:3 Step:1072 Training_loss:0.830779, Acc_avg:65.00% Training_loss_avg:0.621870
Epoch:3 Step:1080 Training_loss:0.414264, Acc_avg:65.50% Training_loss_avg:0.617692
Epoch:3 Step:1088 Tra

52it [00:07,  6.60it/s]


Epoch:3 Step:1240 Val_loss:0.611696, Val_Acc_avg:54.00%
Epoch:3 Step:1248 Training_loss:0.551410, Acc_avg:66.00% Training_loss_avg:0.625344
Epoch:3 Step:1256 Training_loss:0.623605, Acc_avg:65.50% Training_loss_avg:0.629837
Epoch:3 Step:1264 Training_loss:0.853509, Acc_avg:64.75% Training_loss_avg:0.635711
Epoch:3 Step:1272 Training_loss:0.716024, Acc_avg:64.50% Training_loss_avg:0.634229
Epoch:3 Step:1280 Training_loss:0.552973, Acc_avg:65.00% Training_loss_avg:0.633178
Epoch:3 Step:1288 Training_loss:0.591123, Acc_avg:65.00% Training_loss_avg:0.634728
Epoch:3 Step:1296 Training_loss:0.709353, Acc_avg:64.50% Training_loss_avg:0.633990
Epoch:3 Step:1304 Training_loss:0.635373, Acc_avg:64.25% Training_loss_avg:0.635401
Epoch:3 Step:1312 Training_loss:0.770284, Acc_avg:64.00% Training_loss_avg:0.637863
Epoch:3 Step:1320 Training_loss:0.737099, Acc_avg:63.75% Training_loss_avg:0.637500
Epoch:3 Step:1328 Training_loss:0.635163, Acc_avg:63.00% Training_loss_avg:0.642147
Epoch:3 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:1488 Val_loss:0.615168, Val_Acc_avg:58.00%
Epoch:3 Step:1496 Training_loss:0.654807, Acc_avg:61.50% Training_loss_avg:0.648904
Epoch:3 Step:1504 Training_loss:0.541311, Acc_avg:61.75% Training_loss_avg:0.646768
Epoch:3 Step:1512 Training_loss:0.738905, Acc_avg:62.00% Training_loss_avg:0.646884
Epoch:3 Step:1520 Training_loss:0.448414, Acc_avg:62.50% Training_loss_avg:0.642097
Epoch:3 Step:1528 Training_loss:0.726038, Acc_avg:62.50% Training_loss_avg:0.642059
Epoch:3 Step:1536 Training_loss:0.539235, Acc_avg:63.00% Training_loss_avg:0.638077
Epoch:3 Step:1544 Training_loss:0.651976, Acc_avg:62.75% Training_loss_avg:0.638968
Epoch:3 Step:1552 Training_loss:0.583199, Acc_avg:62.50% Training_loss_avg:0.641166
Epoch:3 Step:1560 Training_loss:0.687001, Acc_avg:62.75% Training_loss_avg:0.641262
Epoch:3 Step:1568 Training_loss:0.555695, Acc_avg:63.25% Training_loss_avg:0.638883
Epoch:3 Step:1576 Training_loss:0.456991, Acc_avg:63.25% Training_loss_avg:0.636792
Epoch:3 Step:1584 Tr

52it [00:07,  6.61it/s]


Epoch:3 Step:1736 Val_loss:0.613423, Val_Acc_avg:48.00%
Epoch:3 Step:1744 Training_loss:0.684429, Acc_avg:65.50% Training_loss_avg:0.635459
Epoch:3 Step:1752 Training_loss:0.652651, Acc_avg:66.00% Training_loss_avg:0.635586
Epoch:3 Step:1760 Training_loss:0.582884, Acc_avg:65.25% Training_loss_avg:0.639269
Epoch:3 Step:1768 Training_loss:0.585666, Acc_avg:65.50% Training_loss_avg:0.638004
Epoch:3 Step:1776 Training_loss:0.384484, Acc_avg:66.25% Training_loss_avg:0.631114
Epoch:3 Step:1784 Training_loss:0.582278, Acc_avg:66.00% Training_loss_avg:0.627711
Epoch:3 Step:1792 Training_loss:0.680785, Acc_avg:66.25% Training_loss_avg:0.630160
Epoch:3 Step:1800 Training_loss:0.581835, Acc_avg:66.25% Training_loss_avg:0.630176
Epoch:3 Step:1808 Training_loss:0.537241, Acc_avg:65.50% Training_loss_avg:0.630000
Epoch:3 Step:1816 Training_loss:0.657137, Acc_avg:65.75% Training_loss_avg:0.628596
Epoch:3 Step:1824 Training_loss:0.794261, Acc_avg:66.25% Training_loss_avg:0.629523
Epoch:3 Step:1832 Tr

52it [00:07,  6.61it/s]


Epoch:3 Step:1984 Val_loss:0.612712, Val_Acc_avg:54.00%
Epoch:3 Step:1992 Training_loss:0.563061, Acc_avg:64.75% Training_loss_avg:0.624151
Epoch:3 Step:2000 Training_loss:0.549433, Acc_avg:64.25% Training_loss_avg:0.625318
Epoch:3 Step:2008 Training_loss:0.516630, Acc_avg:65.00% Training_loss_avg:0.621515
Epoch:3 Step:2016 Training_loss:0.461891, Acc_avg:65.50% Training_loss_avg:0.620180
Epoch:3 Step:2024 Training_loss:0.492361, Acc_avg:66.25% Training_loss_avg:0.617153
Epoch:3 Step:2032 Training_loss:0.794196, Acc_avg:66.00% Training_loss_avg:0.621240
Epoch:3 Step:2040 Training_loss:0.508630, Acc_avg:66.50% Training_loss_avg:0.615539
Epoch:3 Step:2048 Training_loss:0.501337, Acc_avg:66.50% Training_loss_avg:0.613291
Epoch:3 Step:2056 Training_loss:0.594471, Acc_avg:66.75% Training_loss_avg:0.609001
Epoch:3 Step:2064 Training_loss:0.678731, Acc_avg:66.75% Training_loss_avg:0.608255
Epoch:3 Step:2072 Training_loss:0.694143, Acc_avg:67.00% Training_loss_avg:0.609122
Epoch:3 Step:2080 Tr

52it [00:07,  6.61it/s]


Epoch:3 Step:2232 Val_loss:0.612796, Val_Acc_avg:52.00%
Epoch:3 Step:2240 Training_loss:0.667286, Acc_avg:63.75% Training_loss_avg:0.620514
Epoch:3 Step:2248 Training_loss:0.599466, Acc_avg:63.75% Training_loss_avg:0.619645
Epoch:3 Step:2256 Training_loss:0.669369, Acc_avg:63.25% Training_loss_avg:0.625058
Epoch:3 Step:2264 Training_loss:0.628565, Acc_avg:64.00% Training_loss_avg:0.621217
Epoch:3 Step:2272 Training_loss:0.717908, Acc_avg:64.00% Training_loss_avg:0.621509
Epoch:3 Step:2280 Training_loss:0.639292, Acc_avg:64.00% Training_loss_avg:0.620231
Epoch:3 Step:2288 Training_loss:0.547653, Acc_avg:63.75% Training_loss_avg:0.623259
Epoch:3 Step:2296 Training_loss:0.501744, Acc_avg:63.25% Training_loss_avg:0.623290
Epoch:3 Step:2304 Training_loss:0.618010, Acc_avg:64.00% Training_loss_avg:0.621312
Epoch:3 Step:2312 Training_loss:0.662841, Acc_avg:63.75% Training_loss_avg:0.620799
Epoch:3 Step:2320 Training_loss:0.551172, Acc_avg:64.75% Training_loss_avg:0.618455
Epoch:3 Step:2328 Tr

52it [00:07,  6.61it/s]


Epoch:3 Step:2480 Val_loss:0.611815, Val_Acc_avg:58.00%
Epoch:3 Step:2488 Training_loss:0.722019, Acc_avg:60.00% Training_loss_avg:0.651752
Epoch:3 Step:2496 Training_loss:0.687641, Acc_avg:60.00% Training_loss_avg:0.650508
Epoch:3 Step:2504 Training_loss:0.594634, Acc_avg:60.75% Training_loss_avg:0.646777
Epoch:3 Step:2512 Training_loss:0.565653, Acc_avg:60.75% Training_loss_avg:0.645864
Epoch:3 Step:2520 Training_loss:0.662159, Acc_avg:60.50% Training_loss_avg:0.645885
Epoch:3 Step:2528 Training_loss:0.740800, Acc_avg:60.25% Training_loss_avg:0.643705
Epoch:3 Step:2536 Training_loss:0.558398, Acc_avg:61.00% Training_loss_avg:0.641254
Epoch:3 Step:2544 Training_loss:0.557866, Acc_avg:60.75% Training_loss_avg:0.643096
Epoch:3 Step:2552 Training_loss:0.684356, Acc_avg:60.75% Training_loss_avg:0.642830
Epoch:3 Step:2560 Training_loss:0.585808, Acc_avg:60.25% Training_loss_avg:0.644444
Epoch:3 Step:2568 Training_loss:0.525732, Acc_avg:61.00% Training_loss_avg:0.641144
Epoch:3 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2728 Val_loss:0.613241, Val_Acc_avg:56.00%
Epoch:3 Step:2736 Training_loss:0.361693, Acc_avg:61.25% Training_loss_avg:0.636874
Epoch:3 Step:2744 Training_loss:0.613717, Acc_avg:61.50% Training_loss_avg:0.639113
Epoch:3 Step:2752 Training_loss:0.578130, Acc_avg:62.25% Training_loss_avg:0.636305
Epoch:3 Step:2760 Training_loss:0.744328, Acc_avg:62.00% Training_loss_avg:0.637313
Epoch:3 Step:2768 Training_loss:0.537354, Acc_avg:62.25% Training_loss_avg:0.637737
Epoch:3 Step:2776 Training_loss:0.515984, Acc_avg:62.50% Training_loss_avg:0.632052
Epoch:3 Step:2784 Training_loss:0.474726, Acc_avg:62.50% Training_loss_avg:0.631256
Epoch:3 Step:2792 Training_loss:0.608560, Acc_avg:62.50% Training_loss_avg:0.630870
Epoch:3 Step:2800 Training_loss:0.861412, Acc_avg:62.50% Training_loss_avg:0.638243
Epoch:3 Step:2808 Training_loss:0.606797, Acc_avg:63.25% Training_loss_avg:0.632971
Epoch:3 Step:2816 Training_loss:1.022559, Acc_avg:62.25% Training_loss_avg:0.644172
Epoch:3 Step:2824 Tr

52it [00:07,  6.61it/s]


Epoch:3 Step:2976 Val_loss:0.611915, Val_Acc_avg:52.00%
Epoch:3 Step:2984 Training_loss:0.561592, Acc_avg:66.50% Training_loss_avg:0.621024
Epoch:3 Step:2992 Training_loss:0.921028, Acc_avg:65.75% Training_loss_avg:0.625274
Epoch:3 Step:3000 Training_loss:0.670536, Acc_avg:65.25% Training_loss_avg:0.625011
Epoch:3 Step:3008 Training_loss:0.615273, Acc_avg:65.50% Training_loss_avg:0.621499
Epoch:3 Step:3016 Training_loss:0.707195, Acc_avg:65.75% Training_loss_avg:0.623031
Epoch:3 Step:3024 Training_loss:0.560007, Acc_avg:65.50% Training_loss_avg:0.623987
Epoch:3 Step:3032 Training_loss:0.806908, Acc_avg:64.50% Training_loss_avg:0.633517
Epoch:3 Step:3040 Training_loss:0.712417, Acc_avg:64.50% Training_loss_avg:0.632664
Epoch:3 Step:3048 Training_loss:0.536503, Acc_avg:65.00% Training_loss_avg:0.629027
Epoch:3 Step:3056 Training_loss:0.665390, Acc_avg:64.50% Training_loss_avg:0.632354
Epoch:3 Step:3064 Training_loss:0.612473, Acc_avg:65.00% Training_loss_avg:0.631095
Epoch:3 Step:3072 Tr

52it [00:07,  6.61it/s]


Epoch:3 Step:3224 Val_loss:0.623627, Val_Acc_avg:58.00%
Epoch:3 Step:3232 Training_loss:0.553676, Acc_avg:63.50% Training_loss_avg:0.626182
Epoch:3 Step:3240 Training_loss:0.591352, Acc_avg:63.75% Training_loss_avg:0.622062
Epoch:3 Step:3248 Training_loss:0.653640, Acc_avg:63.75% Training_loss_avg:0.623230
Epoch:3 Step:3256 Training_loss:0.639030, Acc_avg:63.75% Training_loss_avg:0.623126
Epoch:3 Step:3264 Training_loss:0.584832, Acc_avg:63.75% Training_loss_avg:0.623386
Epoch:3 Step:3272 Training_loss:0.448599, Acc_avg:64.25% Training_loss_avg:0.619796
Epoch:3 Step:3280 Training_loss:0.680479, Acc_avg:64.00% Training_loss_avg:0.622297
Epoch:3 Step:3288 Training_loss:0.743078, Acc_avg:64.00% Training_loss_avg:0.622587
Epoch:3 Step:3296 Training_loss:0.730417, Acc_avg:63.75% Training_loss_avg:0.624028
Epoch:3 Step:3304 Training_loss:0.647963, Acc_avg:63.25% Training_loss_avg:0.627585
Epoch:3 Step:3312 Training_loss:0.680302, Acc_avg:63.25% Training_loss_avg:0.625149
Epoch:3 Step:3320 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:0 Val_loss:0.620212, Val_Acc_avg:54.00%
Epoch:4 Step:8 Training_loss:0.712142, Acc_avg:63.17% Training_loss_avg:0.628791
Epoch:4 Step:16 Training_loss:0.546048, Acc_avg:62.92% Training_loss_avg:0.630365
Epoch:4 Step:24 Training_loss:0.488354, Acc_avg:63.17% Training_loss_avg:0.628417
Epoch:4 Step:32 Training_loss:0.466695, Acc_avg:63.67% Training_loss_avg:0.625150
Epoch:4 Step:40 Training_loss:0.899256, Acc_avg:62.42% Training_loss_avg:0.632693
Epoch:4 Step:48 Training_loss:0.808100, Acc_avg:61.42% Training_loss_avg:0.637623
Epoch:4 Step:56 Training_loss:0.584163, Acc_avg:62.17% Training_loss_avg:0.630886
Epoch:4 Step:64 Training_loss:0.525128, Acc_avg:62.67% Training_loss_avg:0.627977
Epoch:4 Step:72 Training_loss:0.577173, Acc_avg:62.92% Training_loss_avg:0.627215
Epoch:4 Step:80 Training_loss:0.523280, Acc_avg:63.42% Training_loss_avg:0.623537
Epoch:4 Step:88 Training_loss:0.557088, Acc_avg:63.42% Training_loss_avg:0.623479
Epoch:4 Step:96 Training_loss:0.652637, Acc_av

52it [00:07,  6.61it/s]


Epoch:4 Step:248 Val_loss:0.615010, Val_Acc_avg:54.00%
Epoch:4 Step:256 Training_loss:0.643998, Acc_avg:64.67% Training_loss_avg:0.605380
Epoch:4 Step:264 Training_loss:0.578813, Acc_avg:64.42% Training_loss_avg:0.605543
Epoch:4 Step:272 Training_loss:0.665100, Acc_avg:63.92% Training_loss_avg:0.605796
Epoch:4 Step:280 Training_loss:0.555480, Acc_avg:63.42% Training_loss_avg:0.608537
Epoch:4 Step:288 Training_loss:0.637654, Acc_avg:63.17% Training_loss_avg:0.608390
Epoch:4 Step:296 Training_loss:0.575195, Acc_avg:62.92% Training_loss_avg:0.608821
Epoch:4 Step:304 Training_loss:0.612276, Acc_avg:62.92% Training_loss_avg:0.609239
Epoch:4 Step:312 Training_loss:0.474294, Acc_avg:62.92% Training_loss_avg:0.605652
Epoch:4 Step:320 Training_loss:0.578832, Acc_avg:63.17% Training_loss_avg:0.604448
Epoch:4 Step:328 Training_loss:0.555489, Acc_avg:62.67% Training_loss_avg:0.603861
Epoch:4 Step:336 Training_loss:0.596776, Acc_avg:62.42% Training_loss_avg:0.606825
Epoch:4 Step:344 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:4 Step:496 Val_loss:0.633175, Val_Acc_avg:52.00%
Epoch:4 Step:504 Training_loss:0.593331, Acc_avg:68.75% Training_loss_avg:0.574002
Epoch:4 Step:512 Training_loss:0.420014, Acc_avg:68.50% Training_loss_avg:0.574488
Epoch:4 Step:520 Training_loss:0.804736, Acc_avg:67.50% Training_loss_avg:0.580979
Epoch:4 Step:528 Training_loss:0.807322, Acc_avg:67.50% Training_loss_avg:0.586399
Epoch:4 Step:536 Training_loss:0.652678, Acc_avg:67.25% Training_loss_avg:0.588796
Epoch:4 Step:544 Training_loss:0.584143, Acc_avg:68.00% Training_loss_avg:0.586274
Epoch:4 Step:552 Training_loss:0.603952, Acc_avg:68.50% Training_loss_avg:0.584786
Epoch:4 Step:560 Training_loss:0.554929, Acc_avg:68.75% Training_loss_avg:0.584431
Epoch:4 Step:568 Training_loss:0.739397, Acc_avg:69.00% Training_loss_avg:0.585611
Epoch:4 Step:576 Training_loss:0.477878, Acc_avg:69.50% Training_loss_avg:0.581883
Epoch:4 Step:584 Training_loss:0.589340, Acc_avg:69.25% Training_loss_avg:0.585251
Epoch:4 Step:592 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:4 Step:744 Val_loss:0.607305, Val_Acc_avg:54.00%
Epoch:4 Step:752 Training_loss:0.510966, Acc_avg:70.75% Training_loss_avg:0.605427
Epoch:4 Step:760 Training_loss:0.758420, Acc_avg:70.25% Training_loss_avg:0.607869
Epoch:4 Step:768 Training_loss:0.499138, Acc_avg:70.00% Training_loss_avg:0.605241
Epoch:4 Step:776 Training_loss:0.389964, Acc_avg:70.25% Training_loss_avg:0.600812
Epoch:4 Step:784 Training_loss:0.649833, Acc_avg:69.00% Training_loss_avg:0.605294
Epoch:4 Step:792 Training_loss:0.493453, Acc_avg:69.50% Training_loss_avg:0.601715
Epoch:4 Step:800 Training_loss:0.662150, Acc_avg:69.00% Training_loss_avg:0.605164
Epoch:4 Step:808 Training_loss:0.778531, Acc_avg:69.00% Training_loss_avg:0.608401
Epoch:4 Step:816 Training_loss:0.545460, Acc_avg:68.50% Training_loss_avg:0.609079
Epoch:4 Step:824 Training_loss:0.692014, Acc_avg:68.25% Training_loss_avg:0.611626
Epoch:4 Step:832 Training_loss:0.584635, Acc_avg:68.00% Training_loss_avg:0.612506
Epoch:4 Step:840 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:4 Step:992 Val_loss:0.609480, Val_Acc_avg:52.00%
Epoch:4 Step:1000 Training_loss:0.554455, Acc_avg:64.25% Training_loss_avg:0.618383
Epoch:4 Step:1008 Training_loss:0.613240, Acc_avg:64.25% Training_loss_avg:0.617858
Epoch:4 Step:1016 Training_loss:0.537952, Acc_avg:64.25% Training_loss_avg:0.618465
Epoch:4 Step:1024 Training_loss:0.523135, Acc_avg:64.75% Training_loss_avg:0.609573
Epoch:4 Step:1032 Training_loss:0.967300, Acc_avg:64.75% Training_loss_avg:0.612297
Epoch:4 Step:1040 Training_loss:0.745916, Acc_avg:64.50% Training_loss_avg:0.622546
Epoch:4 Step:1048 Training_loss:0.619521, Acc_avg:64.75% Training_loss_avg:0.619642
Epoch:4 Step:1056 Training_loss:0.713017, Acc_avg:65.00% Training_loss_avg:0.616892
Epoch:4 Step:1064 Training_loss:0.728387, Acc_avg:65.00% Training_loss_avg:0.613978
Epoch:4 Step:1072 Training_loss:0.693990, Acc_avg:64.50% Training_loss_avg:0.618492
Epoch:4 Step:1080 Training_loss:0.584521, Acc_avg:64.50% Training_loss_avg:0.615535
Epoch:4 Step:1088 Tra

52it [00:07,  6.60it/s]


Epoch:4 Step:1240 Val_loss:0.611903, Val_Acc_avg:52.00%
Epoch:4 Step:1248 Training_loss:0.479642, Acc_avg:65.00% Training_loss_avg:0.606362
Epoch:4 Step:1256 Training_loss:0.474477, Acc_avg:65.25% Training_loss_avg:0.605469
Epoch:4 Step:1264 Training_loss:0.606067, Acc_avg:65.50% Training_loss_avg:0.605928
Epoch:4 Step:1272 Training_loss:0.478541, Acc_avg:66.00% Training_loss_avg:0.603826
Epoch:4 Step:1280 Training_loss:0.560906, Acc_avg:65.75% Training_loss_avg:0.604787
Epoch:4 Step:1288 Training_loss:0.609470, Acc_avg:65.25% Training_loss_avg:0.605154
Epoch:4 Step:1296 Training_loss:0.464943, Acc_avg:65.50% Training_loss_avg:0.601838
Epoch:4 Step:1304 Training_loss:0.675793, Acc_avg:64.75% Training_loss_avg:0.606559
Epoch:4 Step:1312 Training_loss:0.606637, Acc_avg:65.00% Training_loss_avg:0.604923
Epoch:4 Step:1320 Training_loss:0.340299, Acc_avg:65.25% Training_loss_avg:0.603264
Epoch:4 Step:1328 Training_loss:0.489876, Acc_avg:65.50% Training_loss_avg:0.599537
Epoch:4 Step:1336 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:1488 Val_loss:0.617995, Val_Acc_avg:54.00%
Epoch:4 Step:1496 Training_loss:0.688993, Acc_avg:67.75% Training_loss_avg:0.577358
Epoch:4 Step:1504 Training_loss:0.565095, Acc_avg:67.75% Training_loss_avg:0.576135
Epoch:4 Step:1512 Training_loss:0.573132, Acc_avg:67.50% Training_loss_avg:0.575695
Epoch:4 Step:1520 Training_loss:0.700016, Acc_avg:67.25% Training_loss_avg:0.580112
Epoch:4 Step:1528 Training_loss:0.608960, Acc_avg:67.00% Training_loss_avg:0.581731
Epoch:4 Step:1536 Training_loss:0.535063, Acc_avg:67.00% Training_loss_avg:0.581558
Epoch:4 Step:1544 Training_loss:0.544577, Acc_avg:67.00% Training_loss_avg:0.580837
Epoch:4 Step:1552 Training_loss:0.547354, Acc_avg:67.00% Training_loss_avg:0.578244
Epoch:4 Step:1560 Training_loss:0.624845, Acc_avg:67.00% Training_loss_avg:0.577026
Epoch:4 Step:1568 Training_loss:0.601077, Acc_avg:67.75% Training_loss_avg:0.575360
Epoch:4 Step:1576 Training_loss:0.555799, Acc_avg:67.25% Training_loss_avg:0.575817
Epoch:4 Step:1584 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:1736 Val_loss:0.614263, Val_Acc_avg:44.00%
Epoch:4 Step:1744 Training_loss:0.571776, Acc_avg:64.25% Training_loss_avg:0.605450
Epoch:4 Step:1752 Training_loss:0.692036, Acc_avg:63.50% Training_loss_avg:0.613059
Epoch:4 Step:1760 Training_loss:0.612418, Acc_avg:63.75% Training_loss_avg:0.613056
Epoch:4 Step:1768 Training_loss:0.663769, Acc_avg:63.50% Training_loss_avg:0.616801
Epoch:4 Step:1776 Training_loss:0.725592, Acc_avg:64.00% Training_loss_avg:0.616118
Epoch:4 Step:1784 Training_loss:0.713542, Acc_avg:63.75% Training_loss_avg:0.620797
Epoch:4 Step:1792 Training_loss:0.659322, Acc_avg:63.25% Training_loss_avg:0.625944
Epoch:4 Step:1800 Training_loss:0.725664, Acc_avg:62.50% Training_loss_avg:0.631179
Epoch:4 Step:1808 Training_loss:0.618675, Acc_avg:63.00% Training_loss_avg:0.630369
Epoch:4 Step:1816 Training_loss:0.709155, Acc_avg:62.75% Training_loss_avg:0.630665
Epoch:4 Step:1824 Training_loss:0.668843, Acc_avg:63.25% Training_loss_avg:0.625032
Epoch:4 Step:1832 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:1984 Val_loss:0.673607, Val_Acc_avg:58.00%
Epoch:4 Step:1992 Training_loss:0.631366, Acc_avg:66.50% Training_loss_avg:0.601772
Epoch:4 Step:2000 Training_loss:0.561189, Acc_avg:66.25% Training_loss_avg:0.602955
Epoch:4 Step:2008 Training_loss:0.622604, Acc_avg:65.75% Training_loss_avg:0.605418
Epoch:4 Step:2016 Training_loss:0.644250, Acc_avg:65.50% Training_loss_avg:0.607734
Epoch:4 Step:2024 Training_loss:0.534695, Acc_avg:65.75% Training_loss_avg:0.604488
Epoch:4 Step:2032 Training_loss:0.572538, Acc_avg:66.50% Training_loss_avg:0.603117
Epoch:4 Step:2040 Training_loss:0.473181, Acc_avg:67.25% Training_loss_avg:0.593688
Epoch:4 Step:2048 Training_loss:0.695173, Acc_avg:67.00% Training_loss_avg:0.595577
Epoch:4 Step:2056 Training_loss:0.563282, Acc_avg:67.50% Training_loss_avg:0.591033
Epoch:4 Step:2064 Training_loss:0.453964, Acc_avg:67.50% Training_loss_avg:0.591763
Epoch:4 Step:2072 Training_loss:0.702762, Acc_avg:68.00% Training_loss_avg:0.588463
Epoch:4 Step:2080 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:2232 Val_loss:0.667526, Val_Acc_avg:58.00%
Epoch:4 Step:2240 Training_loss:0.599392, Acc_avg:69.75% Training_loss_avg:0.574567
Epoch:4 Step:2248 Training_loss:0.643544, Acc_avg:69.50% Training_loss_avg:0.573561
Epoch:4 Step:2256 Training_loss:0.781051, Acc_avg:69.00% Training_loss_avg:0.581787
Epoch:4 Step:2264 Training_loss:0.611804, Acc_avg:69.00% Training_loss_avg:0.582794
Epoch:4 Step:2272 Training_loss:0.860749, Acc_avg:68.25% Training_loss_avg:0.589916
Epoch:4 Step:2280 Training_loss:0.712030, Acc_avg:67.75% Training_loss_avg:0.596207
Epoch:4 Step:2288 Training_loss:0.674811, Acc_avg:67.25% Training_loss_avg:0.602271
Epoch:4 Step:2296 Training_loss:0.682528, Acc_avg:67.00% Training_loss_avg:0.604830
Epoch:4 Step:2304 Training_loss:0.783360, Acc_avg:66.75% Training_loss_avg:0.606100
Epoch:4 Step:2312 Training_loss:0.707480, Acc_avg:66.75% Training_loss_avg:0.609272
Epoch:4 Step:2320 Training_loss:0.544816, Acc_avg:66.25% Training_loss_avg:0.611949
Epoch:4 Step:2328 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:2480 Val_loss:0.610115, Val_Acc_avg:48.00%
Epoch:4 Step:2488 Training_loss:0.593864, Acc_avg:64.75% Training_loss_avg:0.623165
Epoch:4 Step:2496 Training_loss:0.554610, Acc_avg:65.25% Training_loss_avg:0.621138
Epoch:4 Step:2504 Training_loss:0.616277, Acc_avg:64.25% Training_loss_avg:0.625444
Epoch:4 Step:2512 Training_loss:0.530847, Acc_avg:64.25% Training_loss_avg:0.626817
Epoch:4 Step:2520 Training_loss:0.692956, Acc_avg:64.50% Training_loss_avg:0.627673
Epoch:4 Step:2528 Training_loss:0.868759, Acc_avg:63.50% Training_loss_avg:0.631435
Epoch:4 Step:2536 Training_loss:0.638038, Acc_avg:63.00% Training_loss_avg:0.634423
Epoch:4 Step:2544 Training_loss:0.608844, Acc_avg:63.00% Training_loss_avg:0.634115
Epoch:4 Step:2552 Training_loss:0.520663, Acc_avg:63.75% Training_loss_avg:0.631679
Epoch:4 Step:2560 Training_loss:0.579088, Acc_avg:64.00% Training_loss_avg:0.630839
Epoch:4 Step:2568 Training_loss:0.590518, Acc_avg:63.50% Training_loss_avg:0.634789
Epoch:4 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:2728 Val_loss:0.612235, Val_Acc_avg:54.00%
Epoch:4 Step:2736 Training_loss:0.539447, Acc_avg:65.50% Training_loss_avg:0.608256
Epoch:4 Step:2744 Training_loss:0.529305, Acc_avg:65.25% Training_loss_avg:0.611324
Epoch:4 Step:2752 Training_loss:0.573081, Acc_avg:65.00% Training_loss_avg:0.611113
Epoch:4 Step:2760 Training_loss:0.765467, Acc_avg:64.75% Training_loss_avg:0.613746
Epoch:4 Step:2768 Training_loss:0.564825, Acc_avg:64.75% Training_loss_avg:0.613173
Epoch:4 Step:2776 Training_loss:0.447913, Acc_avg:65.75% Training_loss_avg:0.608033
Epoch:4 Step:2784 Training_loss:0.549314, Acc_avg:66.25% Training_loss_avg:0.607126
Epoch:4 Step:2792 Training_loss:0.623063, Acc_avg:66.00% Training_loss_avg:0.607707
Epoch:4 Step:2800 Training_loss:0.757162, Acc_avg:65.50% Training_loss_avg:0.611219
Epoch:4 Step:2808 Training_loss:0.619737, Acc_avg:65.50% Training_loss_avg:0.611435
Epoch:4 Step:2816 Training_loss:0.600429, Acc_avg:66.25% Training_loss_avg:0.609289
Epoch:4 Step:2824 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:2976 Val_loss:0.631534, Val_Acc_avg:52.00%
Epoch:4 Step:2984 Training_loss:0.831910, Acc_avg:66.00% Training_loss_avg:0.611208
Epoch:4 Step:2992 Training_loss:0.481054, Acc_avg:65.75% Training_loss_avg:0.612532
Epoch:4 Step:3000 Training_loss:0.420478, Acc_avg:66.00% Training_loss_avg:0.610802
Epoch:4 Step:3008 Training_loss:0.454510, Acc_avg:66.00% Training_loss_avg:0.610176
Epoch:4 Step:3016 Training_loss:0.412283, Acc_avg:66.25% Training_loss_avg:0.608081
Epoch:4 Step:3024 Training_loss:0.612662, Acc_avg:66.00% Training_loss_avg:0.605293
Epoch:4 Step:3032 Training_loss:0.609820, Acc_avg:66.50% Training_loss_avg:0.601040
Epoch:4 Step:3040 Training_loss:0.669935, Acc_avg:66.50% Training_loss_avg:0.600386
Epoch:4 Step:3048 Training_loss:0.818671, Acc_avg:67.00% Training_loss_avg:0.599390
Epoch:4 Step:3056 Training_loss:0.457971, Acc_avg:68.00% Training_loss_avg:0.593418
Epoch:4 Step:3064 Training_loss:0.621092, Acc_avg:68.00% Training_loss_avg:0.593940
Epoch:4 Step:3072 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:3224 Val_loss:0.616687, Val_Acc_avg:54.00%
Epoch:4 Step:3232 Training_loss:0.479017, Acc_avg:68.00% Training_loss_avg:0.588731
Epoch:4 Step:3240 Training_loss:0.649384, Acc_avg:67.75% Training_loss_avg:0.591542
Epoch:4 Step:3248 Training_loss:0.830330, Acc_avg:66.50% Training_loss_avg:0.596544
Epoch:4 Step:3256 Training_loss:0.769595, Acc_avg:65.75% Training_loss_avg:0.598551
Epoch:4 Step:3264 Training_loss:0.555436, Acc_avg:66.00% Training_loss_avg:0.596228
Epoch:4 Step:3272 Training_loss:0.592162, Acc_avg:65.75% Training_loss_avg:0.596068
Epoch:4 Step:3280 Training_loss:0.385554, Acc_avg:66.00% Training_loss_avg:0.591919
Epoch:4 Step:3288 Training_loss:0.606692, Acc_avg:66.75% Training_loss_avg:0.589327
Epoch:4 Step:3296 Training_loss:0.425845, Acc_avg:66.50% Training_loss_avg:0.588118
Epoch:4 Step:3304 Training_loss:0.583959, Acc_avg:66.50% Training_loss_avg:0.588773
Epoch:4 Step:3312 Training_loss:0.590704, Acc_avg:66.25% Training_loss_avg:0.589352
Epoch:4 Step:3320 Tr

In [179]:
print(dir_name)

codebert_finetune_runs/lr_5e-5 redo, 512 split with attention! custom model redo 3


In [64]:
def split_loader(run_dir):
    data_type = ['train', 'val', 'test']
    data_split_type = ['X', 'A', 'Y']

    split_list = []

    for data_type_id in data_type:
      for split_type in data_split_type:
          with open('{}/{}_{}.pickle'.format(run_dir,data_type_id, split_type), 'rb') as input_file:
            object_file = pickle.load(input_file)
          split_list.append(object_file)
    X_train, A_train, Y_train, X_val, A_val, Y_val, X_test, A_test, Y_test = split_list

    return (X_train, A_train, Y_train), (X_val, A_val, Y_val), (X_test, A_test, Y_test)

In [173]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """
    run_name = "lr_5e-6 redo, 512 split with attention! redo5"
    run_dir = "codebert_finetune_runs/{}".format(run_name)
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False
    load_splits = False
    save_data = True

    if load_splits:
      train_data, val_data, test_data = split_loader(run_dir)
      X_train, A_train, Y_train = train_data
      X_val, A_val, Y_val = val_data
      X_test, A_test, Y_test = test_data
      print("loaded data splits")

    else:
      print("generating data splits")

      code_df = preprocess_data(file_loc='code_dataset.jsonl')
      train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

      X_train, A_train, Y_train = train_data
      X_val, A_val, Y_val = val_data
      X_test, A_test, Y_test = test_data

      data_type = ['train', 'val', 'test']
      data_split_type = ['X', 'A', 'Y']

      print(X_train.shape)
    # Creating dir to save logs and checkpoints, re
    dir_name = "{}".format(run_dir)
    if os.path.exists(dir_name):
        input("run name already exists, press Enter to overwrite")
    else:
        os.makedirs(dir_name)


    if save_data:
      print("saving data splits")

      data_all = [train_data, val_data, test_data]
      for i, data in enumerate(data_all):
        for j, split in enumerate(data):
          with open('{}/{}_{}.pickle'.format(run_dir,data_type[i], data_split_type[j]), 'wb') as handle:
            pickle.dump(split, handle)


    print(train_data[0].shape)
    print("Data points: {}".format(len(train_data)))

    # Loading model from checkpoint if location provided
    if online:
        print("loading model from online")
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        print("loading model from local repo")

        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        print("loading model from checkpoint: {}".format(checkpoint_location))
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=5,
          batch_size=8,
          learning_rate=5e-6,
          validate_per=250,
          run_name=run_name,
          run_descrption="lr_5e-6 redo, 256 split, saving splits")


In [174]:
torch.cuda.empty_cache()
main()

generating data splits
Insecure code counts: 3729, Total code counts: 8000, Proportion 0.466125
Data points: 8000




torch.Size([3331, 512])
saving data splits
torch.Size([3331, 512])
Data points: 3
loading model from local repo


Some weights of the model checkpoint at codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at codebert-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for p

Epoch:0 Step:0 Training_loss:0.717449, Acc_avg:25.00% Training_loss_avg:0.717449
Validating:


52it [00:07,  6.60it/s]


Epoch:0 Step:0 Val_loss:0.687055, Val_Acc_avg:56.50%
Epoch:0 Step:8 Training_loss:0.715828, Acc_avg:31.25% Training_loss_avg:0.716639
Epoch:0 Step:16 Training_loss:0.662689, Acc_avg:41.67% Training_loss_avg:0.698655
Epoch:0 Step:24 Training_loss:0.699397, Acc_avg:40.62% Training_loss_avg:0.698841
Epoch:0 Step:32 Training_loss:0.648268, Acc_avg:47.50% Training_loss_avg:0.688726
Epoch:0 Step:40 Training_loss:0.686468, Acc_avg:50.00% Training_loss_avg:0.688350
Epoch:0 Step:48 Training_loss:0.697716, Acc_avg:50.00% Training_loss_avg:0.689688
Epoch:0 Step:56 Training_loss:0.725654, Acc_avg:50.00% Training_loss_avg:0.694184
Epoch:0 Step:64 Training_loss:0.696265, Acc_avg:50.00% Training_loss_avg:0.694415
Epoch:0 Step:72 Training_loss:0.737185, Acc_avg:47.50% Training_loss_avg:0.698692
Epoch:0 Step:80 Training_loss:0.734309, Acc_avg:46.59% Training_loss_avg:0.701930
Epoch:0 Step:88 Training_loss:0.726634, Acc_avg:45.83% Training_loss_avg:0.703989
Epoch:0 Step:96 Training_loss:0.702518, Acc_av

52it [00:07,  6.60it/s]


Epoch:0 Step:248 Val_loss:0.687222, Val_Acc_avg:56.75%
Epoch:0 Step:256 Training_loss:0.608659, Acc_avg:50.38% Training_loss_avg:0.693884
Epoch:0 Step:264 Training_loss:0.709334, Acc_avg:50.37% Training_loss_avg:0.694338
Epoch:0 Step:272 Training_loss:0.715710, Acc_avg:50.00% Training_loss_avg:0.694949
Epoch:0 Step:280 Training_loss:0.667320, Acc_avg:50.00% Training_loss_avg:0.694181
Epoch:0 Step:288 Training_loss:0.706716, Acc_avg:50.00% Training_loss_avg:0.694520
Epoch:0 Step:296 Training_loss:0.676820, Acc_avg:50.99% Training_loss_avg:0.694054
Epoch:0 Step:304 Training_loss:0.646579, Acc_avg:51.60% Training_loss_avg:0.692837
Epoch:0 Step:312 Training_loss:0.720837, Acc_avg:51.56% Training_loss_avg:0.693537
Epoch:0 Step:320 Training_loss:0.728157, Acc_avg:50.61% Training_loss_avg:0.694381
Epoch:0 Step:328 Training_loss:0.676708, Acc_avg:50.89% Training_loss_avg:0.693961
Epoch:0 Step:336 Training_loss:0.676730, Acc_avg:51.16% Training_loss_avg:0.693560
Epoch:0 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:0 Step:496 Val_loss:0.685410, Val_Acc_avg:56.00%
Epoch:0 Step:504 Training_loss:0.643047, Acc_avg:56.25% Training_loss_avg:0.682259
Epoch:0 Step:512 Training_loss:0.720236, Acc_avg:56.25% Training_loss_avg:0.683064
Epoch:0 Step:520 Training_loss:0.643785, Acc_avg:56.75% Training_loss_avg:0.682048
Epoch:0 Step:528 Training_loss:0.687666, Acc_avg:56.25% Training_loss_avg:0.682477
Epoch:0 Step:536 Training_loss:0.553736, Acc_avg:57.75% Training_loss_avg:0.678833
Epoch:0 Step:544 Training_loss:0.776075, Acc_avg:57.25% Training_loss_avg:0.680596
Epoch:0 Step:552 Training_loss:0.590756, Acc_avg:58.00% Training_loss_avg:0.677107
Epoch:0 Step:560 Training_loss:0.633909, Acc_avg:58.25% Training_loss_avg:0.675479
Epoch:0 Step:568 Training_loss:0.644201, Acc_avg:59.25% Training_loss_avg:0.673562
Epoch:0 Step:576 Training_loss:0.808083, Acc_avg:58.75% Training_loss_avg:0.675787
Epoch:0 Step:584 Training_loss:0.678407, Acc_avg:59.00% Training_loss_avg:0.676429
Epoch:0 Step:592 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:0 Step:744 Val_loss:0.686264, Val_Acc_avg:56.75%
Epoch:0 Step:752 Training_loss:0.639110, Acc_avg:57.25% Training_loss_avg:0.690188
Epoch:0 Step:760 Training_loss:0.700001, Acc_avg:57.25% Training_loss_avg:0.691057
Epoch:0 Step:768 Training_loss:0.663938, Acc_avg:57.75% Training_loss_avg:0.691009
Epoch:0 Step:776 Training_loss:0.669895, Acc_avg:57.75% Training_loss_avg:0.692077
Epoch:0 Step:784 Training_loss:0.677190, Acc_avg:56.75% Training_loss_avg:0.694172
Epoch:0 Step:792 Training_loss:0.709374, Acc_avg:56.50% Training_loss_avg:0.696052
Epoch:0 Step:800 Training_loss:0.740091, Acc_avg:56.00% Training_loss_avg:0.697993
Epoch:0 Step:808 Training_loss:0.669749, Acc_avg:56.75% Training_loss_avg:0.696890
Epoch:0 Step:816 Training_loss:0.711397, Acc_avg:56.75% Training_loss_avg:0.696162
Epoch:0 Step:824 Training_loss:0.678199, Acc_avg:57.25% Training_loss_avg:0.695730
Epoch:0 Step:832 Training_loss:0.792253, Acc_avg:57.00% Training_loss_avg:0.695985
Epoch:0 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:0 Step:992 Val_loss:0.683721, Val_Acc_avg:56.00%
Epoch:0 Step:1000 Training_loss:0.660755, Acc_avg:55.00% Training_loss_avg:0.697723
Epoch:0 Step:1008 Training_loss:0.674299, Acc_avg:55.50% Training_loss_avg:0.696587
Epoch:0 Step:1016 Training_loss:0.693143, Acc_avg:56.00% Training_loss_avg:0.693416
Epoch:0 Step:1024 Training_loss:0.727477, Acc_avg:55.50% Training_loss_avg:0.692173
Epoch:0 Step:1032 Training_loss:0.675692, Acc_avg:55.00% Training_loss_avg:0.692717
Epoch:0 Step:1040 Training_loss:0.622910, Acc_avg:55.50% Training_loss_avg:0.689577
Epoch:0 Step:1048 Training_loss:0.646731, Acc_avg:55.75% Training_loss_avg:0.688466
Epoch:0 Step:1056 Training_loss:0.666067, Acc_avg:55.25% Training_loss_avg:0.688035
Epoch:0 Step:1064 Training_loss:0.784648, Acc_avg:54.75% Training_loss_avg:0.690104
Epoch:0 Step:1072 Training_loss:0.565588, Acc_avg:56.00% Training_loss_avg:0.686893
Epoch:0 Step:1080 Training_loss:0.673248, Acc_avg:56.25% Training_loss_avg:0.686468
Epoch:0 Step:1088 Tra

52it [00:07,  6.60it/s]


Epoch:0 Step:1240 Val_loss:0.694643, Val_Acc_avg:56.00%
Epoch:0 Step:1248 Training_loss:0.751404, Acc_avg:57.25% Training_loss_avg:0.671855
Epoch:0 Step:1256 Training_loss:0.602939, Acc_avg:57.75% Training_loss_avg:0.670136
Epoch:0 Step:1264 Training_loss:0.764007, Acc_avg:57.50% Training_loss_avg:0.671871
Epoch:0 Step:1272 Training_loss:0.588339, Acc_avg:57.75% Training_loss_avg:0.670598
Epoch:0 Step:1280 Training_loss:0.698767, Acc_avg:58.00% Training_loss_avg:0.671203
Epoch:0 Step:1288 Training_loss:0.601909, Acc_avg:58.75% Training_loss_avg:0.669453
Epoch:0 Step:1296 Training_loss:0.668582, Acc_avg:59.00% Training_loss_avg:0.669508
Epoch:0 Step:1304 Training_loss:0.706978, Acc_avg:59.00% Training_loss_avg:0.669991
Epoch:0 Step:1312 Training_loss:0.686734, Acc_avg:58.75% Training_loss_avg:0.670199
Epoch:0 Step:1320 Training_loss:0.767153, Acc_avg:59.00% Training_loss_avg:0.671373
Epoch:0 Step:1328 Training_loss:0.498783, Acc_avg:59.50% Training_loss_avg:0.668318
Epoch:0 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:1488 Val_loss:0.681458, Val_Acc_avg:56.00%
Epoch:0 Step:1496 Training_loss:0.657977, Acc_avg:57.75% Training_loss_avg:0.678040
Epoch:0 Step:1504 Training_loss:0.796338, Acc_avg:57.50% Training_loss_avg:0.679682
Epoch:0 Step:1512 Training_loss:0.698041, Acc_avg:57.00% Training_loss_avg:0.681555
Epoch:0 Step:1520 Training_loss:0.712959, Acc_avg:56.25% Training_loss_avg:0.683804
Epoch:0 Step:1528 Training_loss:0.665778, Acc_avg:56.50% Training_loss_avg:0.683272
Epoch:0 Step:1536 Training_loss:0.732907, Acc_avg:55.25% Training_loss_avg:0.685771
Epoch:0 Step:1544 Training_loss:0.666655, Acc_avg:55.75% Training_loss_avg:0.685350
Epoch:0 Step:1552 Training_loss:0.655627, Acc_avg:56.25% Training_loss_avg:0.684286
Epoch:0 Step:1560 Training_loss:0.755158, Acc_avg:56.00% Training_loss_avg:0.683733
Epoch:0 Step:1568 Training_loss:0.697854, Acc_avg:55.25% Training_loss_avg:0.686575
Epoch:0 Step:1576 Training_loss:0.671135, Acc_avg:55.00% Training_loss_avg:0.687362
Epoch:0 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:1736 Val_loss:0.680742, Val_Acc_avg:56.50%
Epoch:0 Step:1744 Training_loss:0.662957, Acc_avg:54.00% Training_loss_avg:0.699122
Epoch:0 Step:1752 Training_loss:0.674963, Acc_avg:54.00% Training_loss_avg:0.698555
Epoch:0 Step:1760 Training_loss:0.664203, Acc_avg:54.00% Training_loss_avg:0.699071
Epoch:0 Step:1768 Training_loss:0.635423, Acc_avg:54.25% Training_loss_avg:0.697559
Epoch:0 Step:1776 Training_loss:0.657496, Acc_avg:53.75% Training_loss_avg:0.698675
Epoch:0 Step:1784 Training_loss:0.685832, Acc_avg:53.75% Training_loss_avg:0.696754
Epoch:0 Step:1792 Training_loss:0.785633, Acc_avg:53.25% Training_loss_avg:0.697380
Epoch:0 Step:1800 Training_loss:0.771549, Acc_avg:52.75% Training_loss_avg:0.700207
Epoch:0 Step:1808 Training_loss:0.677451, Acc_avg:52.50% Training_loss_avg:0.701595
Epoch:0 Step:1816 Training_loss:0.711757, Acc_avg:52.25% Training_loss_avg:0.702288
Epoch:0 Step:1824 Training_loss:0.592360, Acc_avg:52.50% Training_loss_avg:0.701822
Epoch:0 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:1984 Val_loss:0.686190, Val_Acc_avg:56.00%
Epoch:0 Step:1992 Training_loss:0.740846, Acc_avg:58.00% Training_loss_avg:0.683114
Epoch:0 Step:2000 Training_loss:0.496577, Acc_avg:58.25% Training_loss_avg:0.679776
Epoch:0 Step:2008 Training_loss:0.728198, Acc_avg:58.50% Training_loss_avg:0.679529
Epoch:0 Step:2016 Training_loss:0.748792, Acc_avg:58.50% Training_loss_avg:0.680068
Epoch:0 Step:2024 Training_loss:0.667805, Acc_avg:59.00% Training_loss_avg:0.679638
Epoch:0 Step:2032 Training_loss:0.535557, Acc_avg:59.75% Training_loss_avg:0.676221
Epoch:0 Step:2040 Training_loss:0.657938, Acc_avg:59.75% Training_loss_avg:0.675567
Epoch:0 Step:2048 Training_loss:0.668196, Acc_avg:59.50% Training_loss_avg:0.675065
Epoch:0 Step:2056 Training_loss:0.809075, Acc_avg:58.50% Training_loss_avg:0.677948
Epoch:0 Step:2064 Training_loss:0.566436, Acc_avg:59.00% Training_loss_avg:0.675131
Epoch:0 Step:2072 Training_loss:0.746047, Acc_avg:58.75% Training_loss_avg:0.677217
Epoch:0 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:2232 Val_loss:0.687215, Val_Acc_avg:56.00%
Epoch:0 Step:2240 Training_loss:0.735511, Acc_avg:60.25% Training_loss_avg:0.676833
Epoch:0 Step:2248 Training_loss:0.612006, Acc_avg:60.50% Training_loss_avg:0.675472
Epoch:0 Step:2256 Training_loss:0.946209, Acc_avg:59.25% Training_loss_avg:0.681443
Epoch:0 Step:2264 Training_loss:0.697034, Acc_avg:59.25% Training_loss_avg:0.681249
Epoch:0 Step:2272 Training_loss:0.612506, Acc_avg:59.50% Training_loss_avg:0.679545
Epoch:0 Step:2280 Training_loss:0.637340, Acc_avg:60.00% Training_loss_avg:0.679000
Epoch:0 Step:2288 Training_loss:0.728105, Acc_avg:60.25% Training_loss_avg:0.677586
Epoch:0 Step:2296 Training_loss:0.736109, Acc_avg:60.00% Training_loss_avg:0.678534
Epoch:0 Step:2304 Training_loss:0.642898, Acc_avg:60.50% Training_loss_avg:0.677007
Epoch:0 Step:2312 Training_loss:0.707942, Acc_avg:60.00% Training_loss_avg:0.679582
Epoch:0 Step:2320 Training_loss:0.752346, Acc_avg:60.00% Training_loss_avg:0.680486
Epoch:0 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:2480 Val_loss:0.680126, Val_Acc_avg:56.25%
Epoch:0 Step:2488 Training_loss:0.696653, Acc_avg:55.50% Training_loss_avg:0.689895
Epoch:0 Step:2496 Training_loss:0.721766, Acc_avg:55.50% Training_loss_avg:0.689128
Epoch:0 Step:2504 Training_loss:0.721152, Acc_avg:55.25% Training_loss_avg:0.690071
Epoch:0 Step:2512 Training_loss:0.710010, Acc_avg:54.50% Training_loss_avg:0.691765
Epoch:0 Step:2520 Training_loss:0.604106, Acc_avg:55.25% Training_loss_avg:0.687899
Epoch:0 Step:2528 Training_loss:0.683268, Acc_avg:54.75% Training_loss_avg:0.691912
Epoch:0 Step:2536 Training_loss:0.746571, Acc_avg:54.00% Training_loss_avg:0.694738
Epoch:0 Step:2544 Training_loss:0.574940, Acc_avg:54.75% Training_loss_avg:0.691157
Epoch:0 Step:2552 Training_loss:0.652931, Acc_avg:55.25% Training_loss_avg:0.687765
Epoch:0 Step:2560 Training_loss:0.688611, Acc_avg:55.00% Training_loss_avg:0.689672
Epoch:0 Step:2568 Training_loss:0.650764, Acc_avg:55.00% Training_loss_avg:0.688681
Epoch:0 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:2728 Val_loss:0.678390, Val_Acc_avg:57.25%
Epoch:0 Step:2736 Training_loss:0.627504, Acc_avg:56.00% Training_loss_avg:0.684238
Epoch:0 Step:2744 Training_loss:0.645955, Acc_avg:56.50% Training_loss_avg:0.682854
Epoch:0 Step:2752 Training_loss:0.715727, Acc_avg:56.25% Training_loss_avg:0.684239
Epoch:0 Step:2760 Training_loss:0.624524, Acc_avg:56.75% Training_loss_avg:0.682563
Epoch:0 Step:2768 Training_loss:0.714952, Acc_avg:56.50% Training_loss_avg:0.682993
Epoch:0 Step:2776 Training_loss:0.726510, Acc_avg:55.50% Training_loss_avg:0.684601
Epoch:0 Step:2784 Training_loss:0.697274, Acc_avg:55.00% Training_loss_avg:0.686103
Epoch:0 Step:2792 Training_loss:0.630217, Acc_avg:56.00% Training_loss_avg:0.684316
Epoch:0 Step:2800 Training_loss:0.618836, Acc_avg:57.00% Training_loss_avg:0.682707
Epoch:0 Step:2808 Training_loss:0.649629, Acc_avg:57.75% Training_loss_avg:0.682019
Epoch:0 Step:2816 Training_loss:0.678952, Acc_avg:57.75% Training_loss_avg:0.681443
Epoch:0 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:2976 Val_loss:0.681356, Val_Acc_avg:56.75%
Epoch:0 Step:2984 Training_loss:0.624297, Acc_avg:56.00% Training_loss_avg:0.686134
Epoch:0 Step:2992 Training_loss:0.692513, Acc_avg:56.00% Training_loss_avg:0.685246
Epoch:0 Step:3000 Training_loss:0.690765, Acc_avg:55.25% Training_loss_avg:0.685864
Epoch:0 Step:3008 Training_loss:0.686234, Acc_avg:55.00% Training_loss_avg:0.687128
Epoch:0 Step:3016 Training_loss:0.701379, Acc_avg:54.75% Training_loss_avg:0.687284
Epoch:0 Step:3024 Training_loss:0.727869, Acc_avg:54.50% Training_loss_avg:0.689073
Epoch:0 Step:3032 Training_loss:0.649080, Acc_avg:55.00% Training_loss_avg:0.687451
Epoch:0 Step:3040 Training_loss:0.652567, Acc_avg:54.75% Training_loss_avg:0.687979
Epoch:0 Step:3048 Training_loss:0.710327, Acc_avg:54.25% Training_loss_avg:0.688749
Epoch:0 Step:3056 Training_loss:0.777309, Acc_avg:54.00% Training_loss_avg:0.689995
Epoch:0 Step:3064 Training_loss:0.671688, Acc_avg:53.25% Training_loss_avg:0.690911
Epoch:0 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:3224 Val_loss:0.673632, Val_Acc_avg:58.25%
Epoch:0 Step:3232 Training_loss:0.620965, Acc_avg:55.25% Training_loss_avg:0.683007
Epoch:0 Step:3240 Training_loss:0.603717, Acc_avg:55.75% Training_loss_avg:0.681782
Epoch:0 Step:3248 Training_loss:0.834247, Acc_avg:55.50% Training_loss_avg:0.684461
Epoch:0 Step:3256 Training_loss:0.840662, Acc_avg:54.75% Training_loss_avg:0.687362
Epoch:0 Step:3264 Training_loss:0.571840, Acc_avg:56.00% Training_loss_avg:0.683813
Epoch:0 Step:3272 Training_loss:0.743902, Acc_avg:56.50% Training_loss_avg:0.683464
Epoch:0 Step:3280 Training_loss:0.764528, Acc_avg:55.75% Training_loss_avg:0.685789
Epoch:0 Step:3288 Training_loss:0.695405, Acc_avg:56.25% Training_loss_avg:0.684884
Epoch:0 Step:3296 Training_loss:0.624538, Acc_avg:56.75% Training_loss_avg:0.684118
Epoch:0 Step:3304 Training_loss:0.730971, Acc_avg:56.50% Training_loss_avg:0.684875
Epoch:0 Step:3312 Training_loss:0.670354, Acc_avg:56.50% Training_loss_avg:0.684688
Epoch:0 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:0 Val_loss:0.674877, Val_Acc_avg:58.00%
Epoch:1 Step:8 Training_loss:0.682059, Acc_avg:55.67% Training_loss_avg:0.686104
Epoch:1 Step:16 Training_loss:0.616148, Acc_avg:56.67% Training_loss_avg:0.683383
Epoch:1 Step:24 Training_loss:0.656142, Acc_avg:56.42% Training_loss_avg:0.683238
Epoch:1 Step:32 Training_loss:0.722931, Acc_avg:56.67% Training_loss_avg:0.683870
Epoch:1 Step:40 Training_loss:0.759030, Acc_avg:57.17% Training_loss_avg:0.684313
Epoch:1 Step:48 Training_loss:0.723478, Acc_avg:56.17% Training_loss_avg:0.686297
Epoch:1 Step:56 Training_loss:0.737444, Acc_avg:55.92% Training_loss_avg:0.687196
Epoch:1 Step:64 Training_loss:0.703319, Acc_avg:55.92% Training_loss_avg:0.687447
Epoch:1 Step:72 Training_loss:0.645380, Acc_avg:56.17% Training_loss_avg:0.686630
Epoch:1 Step:80 Training_loss:0.641313, Acc_avg:56.67% Training_loss_avg:0.685428
Epoch:1 Step:88 Training_loss:0.672121, Acc_avg:56.42% Training_loss_avg:0.684313
Epoch:1 Step:96 Training_loss:0.659939, Acc_av

52it [00:07,  6.60it/s]


Epoch:1 Step:248 Val_loss:0.675613, Val_Acc_avg:59.25%
Epoch:1 Step:256 Training_loss:0.753333, Acc_avg:56.67% Training_loss_avg:0.687031
Epoch:1 Step:264 Training_loss:0.822795, Acc_avg:55.67% Training_loss_avg:0.691179
Epoch:1 Step:272 Training_loss:0.603464, Acc_avg:56.67% Training_loss_avg:0.688909
Epoch:1 Step:280 Training_loss:0.687121, Acc_avg:56.42% Training_loss_avg:0.689126
Epoch:1 Step:288 Training_loss:0.678989, Acc_avg:56.17% Training_loss_avg:0.688325
Epoch:1 Step:296 Training_loss:0.686378, Acc_avg:55.92% Training_loss_avg:0.689633
Epoch:1 Step:304 Training_loss:0.661019, Acc_avg:55.42% Training_loss_avg:0.690779
Epoch:1 Step:312 Training_loss:0.661585, Acc_avg:56.42% Training_loss_avg:0.687326
Epoch:1 Step:320 Training_loss:0.599056, Acc_avg:57.92% Training_loss_avg:0.682494
Epoch:1 Step:328 Training_loss:0.604034, Acc_avg:57.67% Training_loss_avg:0.683137
Epoch:1 Step:336 Training_loss:0.678279, Acc_avg:57.67% Training_loss_avg:0.681825
Epoch:1 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:1 Step:496 Val_loss:0.670837, Val_Acc_avg:61.00%
Epoch:1 Step:504 Training_loss:0.703397, Acc_avg:58.25% Training_loss_avg:0.678382
Epoch:1 Step:512 Training_loss:0.677681, Acc_avg:57.75% Training_loss_avg:0.679784
Epoch:1 Step:520 Training_loss:0.645546, Acc_avg:58.25% Training_loss_avg:0.677417
Epoch:1 Step:528 Training_loss:0.731281, Acc_avg:58.00% Training_loss_avg:0.679215
Epoch:1 Step:536 Training_loss:0.647427, Acc_avg:58.25% Training_loss_avg:0.678869
Epoch:1 Step:544 Training_loss:0.631616, Acc_avg:58.50% Training_loss_avg:0.678057
Epoch:1 Step:552 Training_loss:0.666355, Acc_avg:58.00% Training_loss_avg:0.679312
Epoch:1 Step:560 Training_loss:0.682600, Acc_avg:58.75% Training_loss_avg:0.676989
Epoch:1 Step:568 Training_loss:0.764378, Acc_avg:58.50% Training_loss_avg:0.677638
Epoch:1 Step:576 Training_loss:0.700676, Acc_avg:58.50% Training_loss_avg:0.679384
Epoch:1 Step:584 Training_loss:0.794473, Acc_avg:58.50% Training_loss_avg:0.680666
Epoch:1 Step:592 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:1 Step:744 Val_loss:0.669965, Val_Acc_avg:60.50%
Epoch:1 Step:752 Training_loss:0.560207, Acc_avg:58.50% Training_loss_avg:0.675233
Epoch:1 Step:760 Training_loss:0.649976, Acc_avg:58.50% Training_loss_avg:0.674656
Epoch:1 Step:768 Training_loss:0.730511, Acc_avg:58.00% Training_loss_avg:0.676428
Epoch:1 Step:776 Training_loss:0.743073, Acc_avg:58.75% Training_loss_avg:0.675892
Epoch:1 Step:784 Training_loss:0.646999, Acc_avg:59.25% Training_loss_avg:0.674482
Epoch:1 Step:792 Training_loss:0.595444, Acc_avg:59.00% Training_loss_avg:0.674393
Epoch:1 Step:800 Training_loss:0.677523, Acc_avg:59.50% Training_loss_avg:0.672820
Epoch:1 Step:808 Training_loss:0.584278, Acc_avg:59.25% Training_loss_avg:0.672659
Epoch:1 Step:816 Training_loss:0.595277, Acc_avg:59.75% Training_loss_avg:0.670469
Epoch:1 Step:824 Training_loss:0.579292, Acc_avg:60.25% Training_loss_avg:0.667339
Epoch:1 Step:832 Training_loss:0.629330, Acc_avg:60.50% Training_loss_avg:0.666346
Epoch:1 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:1 Step:992 Val_loss:0.681077, Val_Acc_avg:59.75%
Epoch:1 Step:1000 Training_loss:0.637839, Acc_avg:59.75% Training_loss_avg:0.659596
Epoch:1 Step:1008 Training_loss:0.607488, Acc_avg:60.00% Training_loss_avg:0.658264
Epoch:1 Step:1016 Training_loss:0.730214, Acc_avg:59.50% Training_loss_avg:0.661206
Epoch:1 Step:1024 Training_loss:0.602393, Acc_avg:59.75% Training_loss_avg:0.659298
Epoch:1 Step:1032 Training_loss:0.711784, Acc_avg:59.50% Training_loss_avg:0.659176
Epoch:1 Step:1040 Training_loss:0.728289, Acc_avg:58.75% Training_loss_avg:0.660608
Epoch:1 Step:1048 Training_loss:0.645994, Acc_avg:58.50% Training_loss_avg:0.660589
Epoch:1 Step:1056 Training_loss:0.661451, Acc_avg:59.00% Training_loss_avg:0.659703
Epoch:1 Step:1064 Training_loss:0.627489, Acc_avg:60.00% Training_loss_avg:0.657780
Epoch:1 Step:1072 Training_loss:0.629365, Acc_avg:60.25% Training_loss_avg:0.656692
Epoch:1 Step:1080 Training_loss:0.605260, Acc_avg:61.00% Training_loss_avg:0.654394
Epoch:1 Step:1088 Tra

52it [00:07,  6.60it/s]


Epoch:1 Step:1240 Val_loss:0.666281, Val_Acc_avg:58.00%
Epoch:1 Step:1248 Training_loss:0.737162, Acc_avg:57.75% Training_loss_avg:0.667304
Epoch:1 Step:1256 Training_loss:0.716993, Acc_avg:57.75% Training_loss_avg:0.668557
Epoch:1 Step:1264 Training_loss:0.535098, Acc_avg:58.00% Training_loss_avg:0.668144
Epoch:1 Step:1272 Training_loss:0.666048, Acc_avg:57.75% Training_loss_avg:0.671494
Epoch:1 Step:1280 Training_loss:0.760799, Acc_avg:56.75% Training_loss_avg:0.677730
Epoch:1 Step:1288 Training_loss:0.592904, Acc_avg:56.75% Training_loss_avg:0.678470
Epoch:1 Step:1296 Training_loss:0.777362, Acc_avg:57.00% Training_loss_avg:0.677141
Epoch:1 Step:1304 Training_loss:0.743022, Acc_avg:57.00% Training_loss_avg:0.676838
Epoch:1 Step:1312 Training_loss:0.587660, Acc_avg:57.00% Training_loss_avg:0.678509
Epoch:1 Step:1320 Training_loss:0.591237, Acc_avg:57.50% Training_loss_avg:0.673651
Epoch:1 Step:1328 Training_loss:0.630306, Acc_avg:57.75% Training_loss_avg:0.670180
Epoch:1 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:1488 Val_loss:0.666107, Val_Acc_avg:58.50%
Epoch:1 Step:1496 Training_loss:0.579587, Acc_avg:57.25% Training_loss_avg:0.666752
Epoch:1 Step:1504 Training_loss:0.771049, Acc_avg:57.25% Training_loss_avg:0.668129
Epoch:1 Step:1512 Training_loss:0.686533, Acc_avg:57.75% Training_loss_avg:0.666766
Epoch:1 Step:1520 Training_loss:0.637613, Acc_avg:57.50% Training_loss_avg:0.666295
Epoch:1 Step:1528 Training_loss:0.706489, Acc_avg:57.25% Training_loss_avg:0.666420
Epoch:1 Step:1536 Training_loss:0.739384, Acc_avg:56.75% Training_loss_avg:0.668312
Epoch:1 Step:1544 Training_loss:0.682939, Acc_avg:56.25% Training_loss_avg:0.670383
Epoch:1 Step:1552 Training_loss:0.606407, Acc_avg:56.50% Training_loss_avg:0.669636
Epoch:1 Step:1560 Training_loss:0.619298, Acc_avg:57.00% Training_loss_avg:0.668527
Epoch:1 Step:1568 Training_loss:0.658163, Acc_avg:57.50% Training_loss_avg:0.668478
Epoch:1 Step:1576 Training_loss:0.612976, Acc_avg:58.00% Training_loss_avg:0.667364
Epoch:1 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:1736 Val_loss:0.681566, Val_Acc_avg:59.75%
Epoch:1 Step:1744 Training_loss:0.692629, Acc_avg:57.50% Training_loss_avg:0.662843
Epoch:1 Step:1752 Training_loss:0.494797, Acc_avg:58.00% Training_loss_avg:0.659622
Epoch:1 Step:1760 Training_loss:0.770876, Acc_avg:58.25% Training_loss_avg:0.659829
Epoch:1 Step:1768 Training_loss:0.663996, Acc_avg:58.25% Training_loss_avg:0.661123
Epoch:1 Step:1776 Training_loss:0.775358, Acc_avg:58.25% Training_loss_avg:0.663065
Epoch:1 Step:1784 Training_loss:0.680923, Acc_avg:57.25% Training_loss_avg:0.667215
Epoch:1 Step:1792 Training_loss:0.667128, Acc_avg:57.25% Training_loss_avg:0.668003
Epoch:1 Step:1800 Training_loss:0.807781, Acc_avg:57.50% Training_loss_avg:0.666776
Epoch:1 Step:1808 Training_loss:0.623720, Acc_avg:58.25% Training_loss_avg:0.665171
Epoch:1 Step:1816 Training_loss:0.731053, Acc_avg:58.00% Training_loss_avg:0.665376
Epoch:1 Step:1824 Training_loss:0.648864, Acc_avg:58.25% Training_loss_avg:0.665563
Epoch:1 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:1984 Val_loss:0.665931, Val_Acc_avg:58.00%
Epoch:1 Step:1992 Training_loss:0.693923, Acc_avg:55.00% Training_loss_avg:0.680498
Epoch:1 Step:2000 Training_loss:0.634953, Acc_avg:55.25% Training_loss_avg:0.680083
Epoch:1 Step:2008 Training_loss:0.645840, Acc_avg:55.50% Training_loss_avg:0.678182
Epoch:1 Step:2016 Training_loss:0.669753, Acc_avg:55.50% Training_loss_avg:0.678852
Epoch:1 Step:2024 Training_loss:0.701762, Acc_avg:55.50% Training_loss_avg:0.679014
Epoch:1 Step:2032 Training_loss:0.583554, Acc_avg:55.25% Training_loss_avg:0.678181
Epoch:1 Step:2040 Training_loss:0.615595, Acc_avg:55.50% Training_loss_avg:0.677726
Epoch:1 Step:2048 Training_loss:0.619647, Acc_avg:56.00% Training_loss_avg:0.676426
Epoch:1 Step:2056 Training_loss:0.588444, Acc_avg:55.25% Training_loss_avg:0.677345
Epoch:1 Step:2064 Training_loss:0.647196, Acc_avg:56.25% Training_loss_avg:0.674440
Epoch:1 Step:2072 Training_loss:0.624019, Acc_avg:55.75% Training_loss_avg:0.675627
Epoch:1 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2232 Val_loss:0.693233, Val_Acc_avg:59.75%
Epoch:1 Step:2240 Training_loss:0.511159, Acc_avg:60.00% Training_loss_avg:0.649947
Epoch:1 Step:2248 Training_loss:0.645385, Acc_avg:60.50% Training_loss_avg:0.650130
Epoch:1 Step:2256 Training_loss:0.524448, Acc_avg:60.50% Training_loss_avg:0.647469
Epoch:1 Step:2264 Training_loss:0.604889, Acc_avg:60.25% Training_loss_avg:0.646992
Epoch:1 Step:2272 Training_loss:0.627967, Acc_avg:61.25% Training_loss_avg:0.644168
Epoch:1 Step:2280 Training_loss:0.566275, Acc_avg:62.00% Training_loss_avg:0.640854
Epoch:1 Step:2288 Training_loss:0.571339, Acc_avg:62.50% Training_loss_avg:0.638983
Epoch:1 Step:2296 Training_loss:0.790173, Acc_avg:62.25% Training_loss_avg:0.642588
Epoch:1 Step:2304 Training_loss:0.613824, Acc_avg:62.25% Training_loss_avg:0.641368
Epoch:1 Step:2312 Training_loss:0.632315, Acc_avg:62.75% Training_loss_avg:0.639690
Epoch:1 Step:2320 Training_loss:0.931824, Acc_avg:62.25% Training_loss_avg:0.646329
Epoch:1 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2480 Val_loss:0.656097, Val_Acc_avg:56.75%
Epoch:1 Step:2488 Training_loss:0.605757, Acc_avg:63.50% Training_loss_avg:0.651706
Epoch:1 Step:2496 Training_loss:0.547897, Acc_avg:64.50% Training_loss_avg:0.647955
Epoch:1 Step:2504 Training_loss:0.782521, Acc_avg:64.00% Training_loss_avg:0.650299
Epoch:1 Step:2512 Training_loss:0.665448, Acc_avg:64.00% Training_loss_avg:0.649183
Epoch:1 Step:2520 Training_loss:0.701298, Acc_avg:63.25% Training_loss_avg:0.653252
Epoch:1 Step:2528 Training_loss:0.675938, Acc_avg:62.25% Training_loss_avg:0.656067
Epoch:1 Step:2536 Training_loss:0.677383, Acc_avg:62.25% Training_loss_avg:0.658428
Epoch:1 Step:2544 Training_loss:0.698795, Acc_avg:62.25% Training_loss_avg:0.657229
Epoch:1 Step:2552 Training_loss:0.711862, Acc_avg:61.75% Training_loss_avg:0.659635
Epoch:1 Step:2560 Training_loss:0.689293, Acc_avg:61.75% Training_loss_avg:0.660936
Epoch:1 Step:2568 Training_loss:0.679822, Acc_avg:60.75% Training_loss_avg:0.664491
Epoch:1 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2728 Val_loss:0.653327, Val_Acc_avg:60.75%
Epoch:1 Step:2736 Training_loss:0.757144, Acc_avg:58.25% Training_loss_avg:0.667734
Epoch:1 Step:2744 Training_loss:0.771574, Acc_avg:58.25% Training_loss_avg:0.666603
Epoch:1 Step:2752 Training_loss:0.587041, Acc_avg:59.00% Training_loss_avg:0.663410
Epoch:1 Step:2760 Training_loss:0.597654, Acc_avg:59.25% Training_loss_avg:0.662840
Epoch:1 Step:2768 Training_loss:0.748396, Acc_avg:59.00% Training_loss_avg:0.664560
Epoch:1 Step:2776 Training_loss:0.630670, Acc_avg:58.75% Training_loss_avg:0.664572
Epoch:1 Step:2784 Training_loss:0.610110, Acc_avg:59.00% Training_loss_avg:0.664586
Epoch:1 Step:2792 Training_loss:0.760507, Acc_avg:58.75% Training_loss_avg:0.665398
Epoch:1 Step:2800 Training_loss:0.652517, Acc_avg:58.50% Training_loss_avg:0.665936
Epoch:1 Step:2808 Training_loss:0.615098, Acc_avg:58.50% Training_loss_avg:0.664847
Epoch:1 Step:2816 Training_loss:0.544375, Acc_avg:59.25% Training_loss_avg:0.660127
Epoch:1 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2976 Val_loss:0.692052, Val_Acc_avg:60.00%
Epoch:1 Step:2984 Training_loss:0.587194, Acc_avg:64.75% Training_loss_avg:0.624945
Epoch:1 Step:2992 Training_loss:0.716496, Acc_avg:65.00% Training_loss_avg:0.624627
Epoch:1 Step:3000 Training_loss:0.636012, Acc_avg:64.75% Training_loss_avg:0.625708
Epoch:1 Step:3008 Training_loss:0.647125, Acc_avg:65.00% Training_loss_avg:0.625589
Epoch:1 Step:3016 Training_loss:0.498769, Acc_avg:65.25% Training_loss_avg:0.623769
Epoch:1 Step:3024 Training_loss:0.716318, Acc_avg:65.50% Training_loss_avg:0.623764
Epoch:1 Step:3032 Training_loss:0.634749, Acc_avg:65.25% Training_loss_avg:0.622897
Epoch:1 Step:3040 Training_loss:0.783237, Acc_avg:65.50% Training_loss_avg:0.623269
Epoch:1 Step:3048 Training_loss:0.699315, Acc_avg:65.50% Training_loss_avg:0.624209
Epoch:1 Step:3056 Training_loss:0.609601, Acc_avg:65.50% Training_loss_avg:0.622550
Epoch:1 Step:3064 Training_loss:0.596109, Acc_avg:65.50% Training_loss_avg:0.621625
Epoch:1 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:3224 Val_loss:0.664806, Val_Acc_avg:56.00%
Epoch:1 Step:3232 Training_loss:0.550112, Acc_avg:62.00% Training_loss_avg:0.631658
Epoch:1 Step:3240 Training_loss:0.543758, Acc_avg:62.00% Training_loss_avg:0.629409
Epoch:1 Step:3248 Training_loss:0.620171, Acc_avg:61.50% Training_loss_avg:0.631860
Epoch:1 Step:3256 Training_loss:0.521056, Acc_avg:61.75% Training_loss_avg:0.630277
Epoch:1 Step:3264 Training_loss:0.837101, Acc_avg:60.50% Training_loss_avg:0.638196
Epoch:1 Step:3272 Training_loss:0.627267, Acc_avg:60.25% Training_loss_avg:0.640332
Epoch:1 Step:3280 Training_loss:0.722407, Acc_avg:59.50% Training_loss_avg:0.644383
Epoch:1 Step:3288 Training_loss:0.575186, Acc_avg:59.75% Training_loss_avg:0.646203
Epoch:1 Step:3296 Training_loss:0.556792, Acc_avg:59.50% Training_loss_avg:0.647208
Epoch:1 Step:3304 Training_loss:0.611054, Acc_avg:60.00% Training_loss_avg:0.645728
Epoch:1 Step:3312 Training_loss:0.582678, Acc_avg:60.00% Training_loss_avg:0.644804
Epoch:1 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:0 Val_loss:0.653850, Val_Acc_avg:60.50%
Epoch:2 Step:8 Training_loss:0.609523, Acc_avg:60.33% Training_loss_avg:0.638544
Epoch:2 Step:16 Training_loss:0.565341, Acc_avg:60.33% Training_loss_avg:0.636466
Epoch:2 Step:24 Training_loss:0.626006, Acc_avg:60.08% Training_loss_avg:0.639233
Epoch:2 Step:32 Training_loss:0.703257, Acc_avg:59.83% Training_loss_avg:0.641627
Epoch:2 Step:40 Training_loss:0.689941, Acc_avg:59.58% Training_loss_avg:0.643962
Epoch:2 Step:48 Training_loss:0.485879, Acc_avg:60.08% Training_loss_avg:0.641936
Epoch:2 Step:56 Training_loss:0.649927, Acc_avg:60.33% Training_loss_avg:0.640605
Epoch:2 Step:64 Training_loss:0.608351, Acc_avg:60.33% Training_loss_avg:0.640052
Epoch:2 Step:72 Training_loss:0.672330, Acc_avg:60.08% Training_loss_avg:0.640556
Epoch:2 Step:80 Training_loss:0.601109, Acc_avg:59.83% Training_loss_avg:0.642602
Epoch:2 Step:88 Training_loss:0.511503, Acc_avg:60.33% Training_loss_avg:0.638506
Epoch:2 Step:96 Training_loss:0.659899, Acc_av

52it [00:07,  6.60it/s]


Epoch:2 Step:248 Val_loss:0.679047, Val_Acc_avg:60.75%
Epoch:2 Step:256 Training_loss:0.528070, Acc_avg:65.58% Training_loss_avg:0.609637
Epoch:2 Step:264 Training_loss:0.689963, Acc_avg:65.83% Training_loss_avg:0.608908
Epoch:2 Step:272 Training_loss:0.739817, Acc_avg:66.08% Training_loss_avg:0.611001
Epoch:2 Step:280 Training_loss:0.530752, Acc_avg:66.33% Training_loss_avg:0.609678
Epoch:2 Step:288 Training_loss:0.669426, Acc_avg:66.08% Training_loss_avg:0.608556
Epoch:2 Step:296 Training_loss:0.661072, Acc_avg:66.08% Training_loss_avg:0.610775
Epoch:2 Step:304 Training_loss:0.727883, Acc_avg:65.83% Training_loss_avg:0.614458
Epoch:2 Step:312 Training_loss:0.747449, Acc_avg:65.58% Training_loss_avg:0.617003
Epoch:2 Step:320 Training_loss:0.601140, Acc_avg:65.08% Training_loss_avg:0.618605
Epoch:2 Step:328 Training_loss:0.718421, Acc_avg:65.83% Training_loss_avg:0.616231
Epoch:2 Step:336 Training_loss:0.722990, Acc_avg:65.33% Training_loss_avg:0.618146
Epoch:2 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:2 Step:496 Val_loss:0.652004, Val_Acc_avg:60.50%
Epoch:2 Step:504 Training_loss:0.760251, Acc_avg:64.75% Training_loss_avg:0.627130
Epoch:2 Step:512 Training_loss:0.740034, Acc_avg:63.75% Training_loss_avg:0.632226
Epoch:2 Step:520 Training_loss:0.562007, Acc_avg:64.00% Training_loss_avg:0.630364
Epoch:2 Step:528 Training_loss:0.583431, Acc_avg:63.75% Training_loss_avg:0.631673
Epoch:2 Step:536 Training_loss:0.480878, Acc_avg:64.00% Training_loss_avg:0.628417
Epoch:2 Step:544 Training_loss:0.629022, Acc_avg:63.50% Training_loss_avg:0.629665
Epoch:2 Step:552 Training_loss:0.618357, Acc_avg:63.00% Training_loss_avg:0.631711
Epoch:2 Step:560 Training_loss:0.921577, Acc_avg:62.00% Training_loss_avg:0.638453
Epoch:2 Step:568 Training_loss:0.670356, Acc_avg:62.25% Training_loss_avg:0.640075
Epoch:2 Step:576 Training_loss:0.525833, Acc_avg:62.25% Training_loss_avg:0.635446
Epoch:2 Step:584 Training_loss:0.641342, Acc_avg:62.50% Training_loss_avg:0.636330
Epoch:2 Step:592 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:2 Step:744 Val_loss:0.693849, Val_Acc_avg:60.00%
Epoch:2 Step:752 Training_loss:0.383269, Acc_avg:61.50% Training_loss_avg:0.637471
Epoch:2 Step:760 Training_loss:0.680803, Acc_avg:61.00% Training_loss_avg:0.639787
Epoch:2 Step:768 Training_loss:0.861512, Acc_avg:60.50% Training_loss_avg:0.645093
Epoch:2 Step:776 Training_loss:0.513983, Acc_avg:61.50% Training_loss_avg:0.640565
Epoch:2 Step:784 Training_loss:0.626837, Acc_avg:61.25% Training_loss_avg:0.639767
Epoch:2 Step:792 Training_loss:0.660517, Acc_avg:61.25% Training_loss_avg:0.639907
Epoch:2 Step:800 Training_loss:0.825591, Acc_avg:61.25% Training_loss_avg:0.642494
Epoch:2 Step:808 Training_loss:0.850293, Acc_avg:60.50% Training_loss_avg:0.648863
Epoch:2 Step:816 Training_loss:0.902911, Acc_avg:59.25% Training_loss_avg:0.658079
Epoch:2 Step:824 Training_loss:0.670559, Acc_avg:59.25% Training_loss_avg:0.657644
Epoch:2 Step:832 Training_loss:0.555685, Acc_avg:59.50% Training_loss_avg:0.655428
Epoch:2 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:2 Step:992 Val_loss:0.646221, Val_Acc_avg:59.75%
Epoch:2 Step:1000 Training_loss:0.725115, Acc_avg:61.75% Training_loss_avg:0.642463
Epoch:2 Step:1008 Training_loss:0.588359, Acc_avg:62.75% Training_loss_avg:0.634474
Epoch:2 Step:1016 Training_loss:0.580184, Acc_avg:63.50% Training_loss_avg:0.631821
Epoch:2 Step:1024 Training_loss:0.470521, Acc_avg:64.00% Training_loss_avg:0.631060
Epoch:2 Step:1032 Training_loss:0.525551, Acc_avg:64.75% Training_loss_avg:0.626797
Epoch:2 Step:1040 Training_loss:0.787038, Acc_avg:64.25% Training_loss_avg:0.629329
Epoch:2 Step:1048 Training_loss:0.650972, Acc_avg:64.00% Training_loss_avg:0.629154
Epoch:2 Step:1056 Training_loss:0.642042, Acc_avg:64.50% Training_loss_avg:0.629546
Epoch:2 Step:1064 Training_loss:0.704925, Acc_avg:63.75% Training_loss_avg:0.630924
Epoch:2 Step:1072 Training_loss:0.672576, Acc_avg:63.50% Training_loss_avg:0.633758
Epoch:2 Step:1080 Training_loss:0.538650, Acc_avg:64.00% Training_loss_avg:0.632311
Epoch:2 Step:1088 Tra

52it [00:07,  6.60it/s]


Epoch:2 Step:1240 Val_loss:0.646013, Val_Acc_avg:59.75%
Epoch:2 Step:1248 Training_loss:0.687837, Acc_avg:63.00% Training_loss_avg:0.626551
Epoch:2 Step:1256 Training_loss:0.697433, Acc_avg:63.00% Training_loss_avg:0.627825
Epoch:2 Step:1264 Training_loss:0.624025, Acc_avg:62.50% Training_loss_avg:0.630088
Epoch:2 Step:1272 Training_loss:0.486982, Acc_avg:62.50% Training_loss_avg:0.629625
Epoch:2 Step:1280 Training_loss:0.447601, Acc_avg:62.75% Training_loss_avg:0.629169
Epoch:2 Step:1288 Training_loss:0.760543, Acc_avg:62.00% Training_loss_avg:0.632811
Epoch:2 Step:1296 Training_loss:0.648128, Acc_avg:62.25% Training_loss_avg:0.632902
Epoch:2 Step:1304 Training_loss:0.496775, Acc_avg:63.00% Training_loss_avg:0.630559
Epoch:2 Step:1312 Training_loss:0.726602, Acc_avg:63.50% Training_loss_avg:0.631062
Epoch:2 Step:1320 Training_loss:0.718044, Acc_avg:63.25% Training_loss_avg:0.633909
Epoch:2 Step:1328 Training_loss:0.548269, Acc_avg:63.25% Training_loss_avg:0.633036
Epoch:2 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:1488 Val_loss:0.646077, Val_Acc_avg:60.25%
Epoch:2 Step:1496 Training_loss:0.705111, Acc_avg:60.75% Training_loss_avg:0.639885
Epoch:2 Step:1504 Training_loss:0.576497, Acc_avg:60.50% Training_loss_avg:0.640262
Epoch:2 Step:1512 Training_loss:0.809423, Acc_avg:59.75% Training_loss_avg:0.647397
Epoch:2 Step:1520 Training_loss:0.583071, Acc_avg:59.25% Training_loss_avg:0.649173
Epoch:2 Step:1528 Training_loss:0.664435, Acc_avg:59.00% Training_loss_avg:0.650128
Epoch:2 Step:1536 Training_loss:0.684346, Acc_avg:58.75% Training_loss_avg:0.649555
Epoch:2 Step:1544 Training_loss:0.564959, Acc_avg:58.75% Training_loss_avg:0.649316
Epoch:2 Step:1552 Training_loss:0.667263, Acc_avg:59.50% Training_loss_avg:0.647046
Epoch:2 Step:1560 Training_loss:0.737820, Acc_avg:60.00% Training_loss_avg:0.645526
Epoch:2 Step:1568 Training_loss:0.602280, Acc_avg:59.75% Training_loss_avg:0.647860
Epoch:2 Step:1576 Training_loss:0.593924, Acc_avg:60.50% Training_loss_avg:0.643231
Epoch:2 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:1736 Val_loss:0.644988, Val_Acc_avg:60.00%
Epoch:2 Step:1744 Training_loss:0.673650, Acc_avg:59.75% Training_loss_avg:0.654851
Epoch:2 Step:1752 Training_loss:0.690324, Acc_avg:59.75% Training_loss_avg:0.657756
Epoch:2 Step:1760 Training_loss:0.638799, Acc_avg:59.25% Training_loss_avg:0.658060
Epoch:2 Step:1768 Training_loss:0.503426, Acc_avg:59.50% Training_loss_avg:0.656744
Epoch:2 Step:1776 Training_loss:0.671726, Acc_avg:59.75% Training_loss_avg:0.653452
Epoch:2 Step:1784 Training_loss:0.687762, Acc_avg:59.25% Training_loss_avg:0.657929
Epoch:2 Step:1792 Training_loss:0.672505, Acc_avg:58.75% Training_loss_avg:0.660515
Epoch:2 Step:1800 Training_loss:0.578446, Acc_avg:59.00% Training_loss_avg:0.657617
Epoch:2 Step:1808 Training_loss:0.655396, Acc_avg:59.00% Training_loss_avg:0.656472
Epoch:2 Step:1816 Training_loss:0.589294, Acc_avg:59.75% Training_loss_avg:0.650909
Epoch:2 Step:1824 Training_loss:0.778402, Acc_avg:60.25% Training_loss_avg:0.649799
Epoch:2 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:1984 Val_loss:0.651575, Val_Acc_avg:60.50%
Epoch:2 Step:1992 Training_loss:0.737912, Acc_avg:60.75% Training_loss_avg:0.649279
Epoch:2 Step:2000 Training_loss:0.665001, Acc_avg:60.75% Training_loss_avg:0.649397
Epoch:2 Step:2008 Training_loss:0.858973, Acc_avg:59.50% Training_loss_avg:0.655583
Epoch:2 Step:2016 Training_loss:0.620679, Acc_avg:59.75% Training_loss_avg:0.654954
Epoch:2 Step:2024 Training_loss:0.494537, Acc_avg:60.25% Training_loss_avg:0.651992
Epoch:2 Step:2032 Training_loss:0.596371, Acc_avg:59.50% Training_loss_avg:0.653873
Epoch:2 Step:2040 Training_loss:0.816856, Acc_avg:59.25% Training_loss_avg:0.657325
Epoch:2 Step:2048 Training_loss:0.603430, Acc_avg:59.50% Training_loss_avg:0.656142
Epoch:2 Step:2056 Training_loss:0.537244, Acc_avg:60.50% Training_loss_avg:0.651709
Epoch:2 Step:2064 Training_loss:0.514157, Acc_avg:60.75% Training_loss_avg:0.648732
Epoch:2 Step:2072 Training_loss:0.760137, Acc_avg:60.25% Training_loss_avg:0.650279
Epoch:2 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2232 Val_loss:0.668137, Val_Acc_avg:60.00%
Epoch:2 Step:2240 Training_loss:0.457149, Acc_avg:63.75% Training_loss_avg:0.618551
Epoch:2 Step:2248 Training_loss:0.559048, Acc_avg:63.50% Training_loss_avg:0.619197
Epoch:2 Step:2256 Training_loss:0.822902, Acc_avg:63.00% Training_loss_avg:0.623467
Epoch:2 Step:2264 Training_loss:0.813371, Acc_avg:62.50% Training_loss_avg:0.628285
Epoch:2 Step:2272 Training_loss:0.658633, Acc_avg:62.50% Training_loss_avg:0.627649
Epoch:2 Step:2280 Training_loss:0.532062, Acc_avg:63.00% Training_loss_avg:0.626015
Epoch:2 Step:2288 Training_loss:0.768335, Acc_avg:62.00% Training_loss_avg:0.633799
Epoch:2 Step:2296 Training_loss:0.604104, Acc_avg:62.25% Training_loss_avg:0.634904
Epoch:2 Step:2304 Training_loss:0.629631, Acc_avg:62.00% Training_loss_avg:0.635082
Epoch:2 Step:2312 Training_loss:0.701613, Acc_avg:61.75% Training_loss_avg:0.636978
Epoch:2 Step:2320 Training_loss:0.637420, Acc_avg:62.25% Training_loss_avg:0.633594
Epoch:2 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2480 Val_loss:0.646461, Val_Acc_avg:60.25%
Epoch:2 Step:2488 Training_loss:0.586532, Acc_avg:63.25% Training_loss_avg:0.623870
Epoch:2 Step:2496 Training_loss:0.635683, Acc_avg:63.50% Training_loss_avg:0.624405
Epoch:2 Step:2504 Training_loss:0.573926, Acc_avg:63.00% Training_loss_avg:0.624888
Epoch:2 Step:2512 Training_loss:0.525985, Acc_avg:63.50% Training_loss_avg:0.623123
Epoch:2 Step:2520 Training_loss:0.589523, Acc_avg:64.25% Training_loss_avg:0.617941
Epoch:2 Step:2528 Training_loss:0.671048, Acc_avg:63.75% Training_loss_avg:0.621130
Epoch:2 Step:2536 Training_loss:0.742790, Acc_avg:64.00% Training_loss_avg:0.621719
Epoch:2 Step:2544 Training_loss:0.467795, Acc_avg:64.50% Training_loss_avg:0.618084
Epoch:2 Step:2552 Training_loss:0.721627, Acc_avg:63.50% Training_loss_avg:0.623565
Epoch:2 Step:2560 Training_loss:0.702161, Acc_avg:63.00% Training_loss_avg:0.629638
Epoch:2 Step:2568 Training_loss:0.610126, Acc_avg:62.50% Training_loss_avg:0.632235
Epoch:2 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2728 Val_loss:0.652557, Val_Acc_avg:61.25%
Epoch:2 Step:2736 Training_loss:0.515097, Acc_avg:65.25% Training_loss_avg:0.626270
Epoch:2 Step:2744 Training_loss:0.638296, Acc_avg:65.50% Training_loss_avg:0.625435
Epoch:2 Step:2752 Training_loss:0.539656, Acc_avg:65.25% Training_loss_avg:0.623565
Epoch:2 Step:2760 Training_loss:0.802491, Acc_avg:65.00% Training_loss_avg:0.628670
Epoch:2 Step:2768 Training_loss:0.780052, Acc_avg:63.50% Training_loss_avg:0.633192
Epoch:2 Step:2776 Training_loss:0.425598, Acc_avg:63.75% Training_loss_avg:0.628269
Epoch:2 Step:2784 Training_loss:0.528071, Acc_avg:64.25% Training_loss_avg:0.626175
Epoch:2 Step:2792 Training_loss:0.531609, Acc_avg:65.50% Training_loss_avg:0.621831
Epoch:2 Step:2800 Training_loss:0.644462, Acc_avg:66.00% Training_loss_avg:0.619964
Epoch:2 Step:2808 Training_loss:0.609522, Acc_avg:66.00% Training_loss_avg:0.616637
Epoch:2 Step:2816 Training_loss:0.600213, Acc_avg:66.25% Training_loss_avg:0.616047
Epoch:2 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2976 Val_loss:0.655568, Val_Acc_avg:61.25%
Epoch:2 Step:2984 Training_loss:0.580378, Acc_avg:66.00% Training_loss_avg:0.625638
Epoch:2 Step:2992 Training_loss:0.526608, Acc_avg:66.00% Training_loss_avg:0.623186
Epoch:2 Step:3000 Training_loss:0.743780, Acc_avg:65.50% Training_loss_avg:0.625414
Epoch:2 Step:3008 Training_loss:0.513699, Acc_avg:66.00% Training_loss_avg:0.624195
Epoch:2 Step:3016 Training_loss:0.707132, Acc_avg:65.50% Training_loss_avg:0.627423
Epoch:2 Step:3024 Training_loss:0.681807, Acc_avg:65.00% Training_loss_avg:0.630535
Epoch:2 Step:3032 Training_loss:0.526624, Acc_avg:65.25% Training_loss_avg:0.628258
Epoch:2 Step:3040 Training_loss:0.534876, Acc_avg:65.25% Training_loss_avg:0.627690
Epoch:2 Step:3048 Training_loss:0.610555, Acc_avg:65.50% Training_loss_avg:0.624708
Epoch:2 Step:3056 Training_loss:0.750335, Acc_avg:64.75% Training_loss_avg:0.629803
Epoch:2 Step:3064 Training_loss:0.482930, Acc_avg:65.25% Training_loss_avg:0.623549
Epoch:2 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:3224 Val_loss:0.701490, Val_Acc_avg:60.25%
Epoch:2 Step:3232 Training_loss:0.576121, Acc_avg:63.50% Training_loss_avg:0.638115
Epoch:2 Step:3240 Training_loss:0.572439, Acc_avg:64.00% Training_loss_avg:0.636868
Epoch:2 Step:3248 Training_loss:0.786263, Acc_avg:63.75% Training_loss_avg:0.640359
Epoch:2 Step:3256 Training_loss:0.565479, Acc_avg:63.50% Training_loss_avg:0.639917
Epoch:2 Step:3264 Training_loss:0.576189, Acc_avg:63.75% Training_loss_avg:0.637162
Epoch:2 Step:3272 Training_loss:0.470637, Acc_avg:63.75% Training_loss_avg:0.635693
Epoch:2 Step:3280 Training_loss:0.638676, Acc_avg:63.25% Training_loss_avg:0.640027
Epoch:2 Step:3288 Training_loss:0.786224, Acc_avg:62.75% Training_loss_avg:0.643721
Epoch:2 Step:3296 Training_loss:0.605243, Acc_avg:63.00% Training_loss_avg:0.643956
Epoch:2 Step:3304 Training_loss:0.773021, Acc_avg:62.75% Training_loss_avg:0.647594
Epoch:2 Step:3312 Training_loss:0.612661, Acc_avg:62.25% Training_loss_avg:0.650501
Epoch:2 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:0 Val_loss:0.662966, Val_Acc_avg:60.75%
Epoch:3 Step:8 Training_loss:0.551721, Acc_avg:65.25% Training_loss_avg:0.627404
Epoch:3 Step:16 Training_loss:0.776512, Acc_avg:64.75% Training_loss_avg:0.631066
Epoch:3 Step:24 Training_loss:0.759401, Acc_avg:65.00% Training_loss_avg:0.634061
Epoch:3 Step:32 Training_loss:0.737586, Acc_avg:64.75% Training_loss_avg:0.633750
Epoch:3 Step:40 Training_loss:0.626158, Acc_avg:64.75% Training_loss_avg:0.630731
Epoch:3 Step:48 Training_loss:0.605868, Acc_avg:64.00% Training_loss_avg:0.631241
Epoch:3 Step:56 Training_loss:0.509734, Acc_avg:64.25% Training_loss_avg:0.630904
Epoch:3 Step:64 Training_loss:0.498937, Acc_avg:64.75% Training_loss_avg:0.626007
Epoch:3 Step:72 Training_loss:0.758410, Acc_avg:64.00% Training_loss_avg:0.630901
Epoch:3 Step:80 Training_loss:0.795486, Acc_avg:63.75% Training_loss_avg:0.632668
Epoch:3 Step:88 Training_loss:0.501651, Acc_avg:64.00% Training_loss_avg:0.629065
Epoch:3 Step:96 Training_loss:0.582836, Acc_av

52it [00:07,  6.60it/s]


Epoch:3 Step:248 Val_loss:0.648301, Val_Acc_avg:61.25%
Epoch:3 Step:256 Training_loss:0.570903, Acc_avg:64.75% Training_loss_avg:0.618178
Epoch:3 Step:264 Training_loss:0.701904, Acc_avg:64.00% Training_loss_avg:0.619224
Epoch:3 Step:272 Training_loss:0.571789, Acc_avg:63.75% Training_loss_avg:0.619305
Epoch:3 Step:280 Training_loss:0.693992, Acc_avg:63.75% Training_loss_avg:0.615517
Epoch:3 Step:288 Training_loss:0.553100, Acc_avg:63.50% Training_loss_avg:0.615354
Epoch:3 Step:296 Training_loss:0.714022, Acc_avg:63.00% Training_loss_avg:0.618112
Epoch:3 Step:304 Training_loss:0.862109, Acc_avg:62.25% Training_loss_avg:0.623905
Epoch:3 Step:312 Training_loss:0.523316, Acc_avg:62.50% Training_loss_avg:0.618646
Epoch:3 Step:320 Training_loss:0.601437, Acc_avg:62.75% Training_loss_avg:0.619365
Epoch:3 Step:328 Training_loss:0.772859, Acc_avg:62.50% Training_loss_avg:0.623299
Epoch:3 Step:336 Training_loss:0.566912, Acc_avg:62.25% Training_loss_avg:0.625224
Epoch:3 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:3 Step:496 Val_loss:0.665067, Val_Acc_avg:61.00%
Epoch:3 Step:504 Training_loss:0.643540, Acc_avg:61.00% Training_loss_avg:0.628648
Epoch:3 Step:512 Training_loss:0.535175, Acc_avg:61.00% Training_loss_avg:0.627163
Epoch:3 Step:520 Training_loss:0.605110, Acc_avg:61.00% Training_loss_avg:0.627071
Epoch:3 Step:528 Training_loss:0.746689, Acc_avg:60.00% Training_loss_avg:0.632379
Epoch:3 Step:536 Training_loss:0.653027, Acc_avg:60.50% Training_loss_avg:0.632220
Epoch:3 Step:544 Training_loss:0.539039, Acc_avg:60.75% Training_loss_avg:0.631141
Epoch:3 Step:552 Training_loss:0.602697, Acc_avg:60.50% Training_loss_avg:0.632502
Epoch:3 Step:560 Training_loss:0.529961, Acc_avg:60.25% Training_loss_avg:0.632597
Epoch:3 Step:568 Training_loss:0.645114, Acc_avg:60.00% Training_loss_avg:0.631758
Epoch:3 Step:576 Training_loss:0.660900, Acc_avg:60.00% Training_loss_avg:0.632379
Epoch:3 Step:584 Training_loss:0.566412, Acc_avg:60.50% Training_loss_avg:0.631382
Epoch:3 Step:592 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:3 Step:744 Val_loss:0.653007, Val_Acc_avg:61.75%
Epoch:3 Step:752 Training_loss:0.594764, Acc_avg:62.75% Training_loss_avg:0.615012
Epoch:3 Step:760 Training_loss:0.459680, Acc_avg:63.25% Training_loss_avg:0.612615
Epoch:3 Step:768 Training_loss:0.693201, Acc_avg:63.50% Training_loss_avg:0.614181
Epoch:3 Step:776 Training_loss:0.941623, Acc_avg:62.50% Training_loss_avg:0.622181
Epoch:3 Step:784 Training_loss:0.560873, Acc_avg:62.50% Training_loss_avg:0.620185
Epoch:3 Step:792 Training_loss:0.595451, Acc_avg:62.50% Training_loss_avg:0.619148
Epoch:3 Step:800 Training_loss:0.649790, Acc_avg:62.50% Training_loss_avg:0.619166
Epoch:3 Step:808 Training_loss:0.678242, Acc_avg:62.75% Training_loss_avg:0.617322
Epoch:3 Step:816 Training_loss:0.587338, Acc_avg:63.00% Training_loss_avg:0.616553
Epoch:3 Step:824 Training_loss:0.579090, Acc_avg:62.75% Training_loss_avg:0.620400
Epoch:3 Step:832 Training_loss:0.737326, Acc_avg:62.25% Training_loss_avg:0.625449
Epoch:3 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:3 Step:992 Val_loss:0.774610, Val_Acc_avg:60.50%
Epoch:3 Step:1000 Training_loss:0.793640, Acc_avg:65.50% Training_loss_avg:0.594248
Epoch:3 Step:1008 Training_loss:0.431527, Acc_avg:66.50% Training_loss_avg:0.586679
Epoch:3 Step:1016 Training_loss:0.679454, Acc_avg:66.75% Training_loss_avg:0.589849
Epoch:3 Step:1024 Training_loss:0.704381, Acc_avg:66.75% Training_loss_avg:0.594378
Epoch:3 Step:1032 Training_loss:0.451676, Acc_avg:66.50% Training_loss_avg:0.592728
Epoch:3 Step:1040 Training_loss:0.496367, Acc_avg:67.00% Training_loss_avg:0.588425
Epoch:3 Step:1048 Training_loss:0.683321, Acc_avg:66.25% Training_loss_avg:0.593357
Epoch:3 Step:1056 Training_loss:0.819107, Acc_avg:66.25% Training_loss_avg:0.596575
Epoch:3 Step:1064 Training_loss:0.441701, Acc_avg:67.00% Training_loss_avg:0.592561
Epoch:3 Step:1072 Training_loss:0.666087, Acc_avg:66.50% Training_loss_avg:0.598833
Epoch:3 Step:1080 Training_loss:0.604895, Acc_avg:66.50% Training_loss_avg:0.600401
Epoch:3 Step:1088 Tra

52it [00:07,  6.60it/s]


Epoch:3 Step:1240 Val_loss:0.638910, Val_Acc_avg:61.25%
Epoch:3 Step:1248 Training_loss:0.688336, Acc_avg:64.00% Training_loss_avg:0.611894
Epoch:3 Step:1256 Training_loss:0.597547, Acc_avg:63.75% Training_loss_avg:0.612813
Epoch:3 Step:1264 Training_loss:0.709476, Acc_avg:63.50% Training_loss_avg:0.613999
Epoch:3 Step:1272 Training_loss:0.651232, Acc_avg:63.25% Training_loss_avg:0.616602
Epoch:3 Step:1280 Training_loss:0.664540, Acc_avg:63.75% Training_loss_avg:0.615055
Epoch:3 Step:1288 Training_loss:0.577564, Acc_avg:64.00% Training_loss_avg:0.612697
Epoch:3 Step:1296 Training_loss:0.609509, Acc_avg:63.75% Training_loss_avg:0.619744
Epoch:3 Step:1304 Training_loss:0.789904, Acc_avg:63.25% Training_loss_avg:0.624624
Epoch:3 Step:1312 Training_loss:0.515745, Acc_avg:64.25% Training_loss_avg:0.621715
Epoch:3 Step:1320 Training_loss:0.720396, Acc_avg:63.25% Training_loss_avg:0.625459
Epoch:3 Step:1328 Training_loss:0.527581, Acc_avg:63.25% Training_loss_avg:0.623378
Epoch:3 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:1488 Val_loss:0.655328, Val_Acc_avg:61.75%
Epoch:3 Step:1496 Training_loss:0.518694, Acc_avg:65.25% Training_loss_avg:0.611946
Epoch:3 Step:1504 Training_loss:0.711838, Acc_avg:65.50% Training_loss_avg:0.608710
Epoch:3 Step:1512 Training_loss:0.646931, Acc_avg:65.75% Training_loss_avg:0.609778
Epoch:3 Step:1520 Training_loss:0.494675, Acc_avg:65.50% Training_loss_avg:0.611729
Epoch:3 Step:1528 Training_loss:0.539233, Acc_avg:65.75% Training_loss_avg:0.609398
Epoch:3 Step:1536 Training_loss:0.722500, Acc_avg:65.75% Training_loss_avg:0.609036
Epoch:3 Step:1544 Training_loss:0.630388, Acc_avg:65.50% Training_loss_avg:0.610743
Epoch:3 Step:1552 Training_loss:0.645720, Acc_avg:66.25% Training_loss_avg:0.607242
Epoch:3 Step:1560 Training_loss:0.498794, Acc_avg:66.75% Training_loss_avg:0.603644
Epoch:3 Step:1568 Training_loss:0.780276, Acc_avg:65.50% Training_loss_avg:0.609948
Epoch:3 Step:1576 Training_loss:0.647457, Acc_avg:65.50% Training_loss_avg:0.611212
Epoch:3 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:1736 Val_loss:0.669818, Val_Acc_avg:61.25%
Epoch:3 Step:1744 Training_loss:0.664527, Acc_avg:67.50% Training_loss_avg:0.607293
Epoch:3 Step:1752 Training_loss:0.591948, Acc_avg:66.75% Training_loss_avg:0.610559
Epoch:3 Step:1760 Training_loss:0.737316, Acc_avg:65.75% Training_loss_avg:0.616256
Epoch:3 Step:1768 Training_loss:0.432969, Acc_avg:66.00% Training_loss_avg:0.609022
Epoch:3 Step:1776 Training_loss:0.419432, Acc_avg:66.75% Training_loss_avg:0.605928
Epoch:3 Step:1784 Training_loss:0.843265, Acc_avg:66.50% Training_loss_avg:0.609849
Epoch:3 Step:1792 Training_loss:0.576881, Acc_avg:66.00% Training_loss_avg:0.610714
Epoch:3 Step:1800 Training_loss:0.454645, Acc_avg:66.75% Training_loss_avg:0.605855
Epoch:3 Step:1808 Training_loss:0.435428, Acc_avg:67.00% Training_loss_avg:0.603940
Epoch:3 Step:1816 Training_loss:0.599802, Acc_avg:67.25% Training_loss_avg:0.603655
Epoch:3 Step:1824 Training_loss:0.637333, Acc_avg:67.50% Training_loss_avg:0.603177
Epoch:3 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:1984 Val_loss:0.656925, Val_Acc_avg:61.25%
Epoch:3 Step:1992 Training_loss:0.433749, Acc_avg:70.50% Training_loss_avg:0.570594
Epoch:3 Step:2000 Training_loss:0.454454, Acc_avg:70.50% Training_loss_avg:0.565359
Epoch:3 Step:2008 Training_loss:0.553033, Acc_avg:70.25% Training_loss_avg:0.564682
Epoch:3 Step:2016 Training_loss:0.585203, Acc_avg:70.00% Training_loss_avg:0.568294
Epoch:3 Step:2024 Training_loss:0.531424, Acc_avg:69.75% Training_loss_avg:0.566534
Epoch:3 Step:2032 Training_loss:0.494091, Acc_avg:70.25% Training_loss_avg:0.565158
Epoch:3 Step:2040 Training_loss:0.401178, Acc_avg:70.75% Training_loss_avg:0.559310
Epoch:3 Step:2048 Training_loss:0.619961, Acc_avg:70.25% Training_loss_avg:0.560362
Epoch:3 Step:2056 Training_loss:0.569679, Acc_avg:70.00% Training_loss_avg:0.560156
Epoch:3 Step:2064 Training_loss:0.932228, Acc_avg:69.50% Training_loss_avg:0.567851
Epoch:3 Step:2072 Training_loss:0.561863, Acc_avg:69.50% Training_loss_avg:0.563862
Epoch:3 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2232 Val_loss:0.712187, Val_Acc_avg:61.25%
Epoch:3 Step:2240 Training_loss:0.568901, Acc_avg:70.50% Training_loss_avg:0.544819
Epoch:3 Step:2248 Training_loss:0.610770, Acc_avg:70.50% Training_loss_avg:0.548335
Epoch:3 Step:2256 Training_loss:0.804690, Acc_avg:69.50% Training_loss_avg:0.558116
Epoch:3 Step:2264 Training_loss:0.654425, Acc_avg:69.25% Training_loss_avg:0.561167
Epoch:3 Step:2272 Training_loss:0.418774, Acc_avg:69.75% Training_loss_avg:0.558099
Epoch:3 Step:2280 Training_loss:0.791597, Acc_avg:69.50% Training_loss_avg:0.560507
Epoch:3 Step:2288 Training_loss:0.567859, Acc_avg:69.00% Training_loss_avg:0.562197
Epoch:3 Step:2296 Training_loss:0.778719, Acc_avg:68.25% Training_loss_avg:0.568958
Epoch:3 Step:2304 Training_loss:0.639798, Acc_avg:68.50% Training_loss_avg:0.569585
Epoch:3 Step:2312 Training_loss:0.864025, Acc_avg:68.25% Training_loss_avg:0.575300
Epoch:3 Step:2320 Training_loss:0.452549, Acc_avg:68.75% Training_loss_avg:0.568981
Epoch:3 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2480 Val_loss:0.667308, Val_Acc_avg:56.00%
Epoch:3 Step:2488 Training_loss:0.840160, Acc_avg:62.25% Training_loss_avg:0.629203
Epoch:3 Step:2496 Training_loss:1.008829, Acc_avg:61.25% Training_loss_avg:0.638291
Epoch:3 Step:2504 Training_loss:0.415823, Acc_avg:62.00% Training_loss_avg:0.630534
Epoch:3 Step:2512 Training_loss:0.783912, Acc_avg:61.25% Training_loss_avg:0.635847
Epoch:3 Step:2520 Training_loss:0.743899, Acc_avg:61.50% Training_loss_avg:0.638401
Epoch:3 Step:2528 Training_loss:0.626535, Acc_avg:62.50% Training_loss_avg:0.635497
Epoch:3 Step:2536 Training_loss:0.411191, Acc_avg:62.75% Training_loss_avg:0.633197
Epoch:3 Step:2544 Training_loss:0.529201, Acc_avg:62.50% Training_loss_avg:0.638080
Epoch:3 Step:2552 Training_loss:0.692082, Acc_avg:61.75% Training_loss_avg:0.645988
Epoch:3 Step:2560 Training_loss:0.498630, Acc_avg:61.75% Training_loss_avg:0.642021
Epoch:3 Step:2568 Training_loss:0.468228, Acc_avg:61.75% Training_loss_avg:0.643414
Epoch:3 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2728 Val_loss:0.663599, Val_Acc_avg:61.00%
Epoch:3 Step:2736 Training_loss:0.530925, Acc_avg:62.25% Training_loss_avg:0.638190
Epoch:3 Step:2744 Training_loss:0.603644, Acc_avg:62.25% Training_loss_avg:0.636006
Epoch:3 Step:2752 Training_loss:0.600045, Acc_avg:62.25% Training_loss_avg:0.634530
Epoch:3 Step:2760 Training_loss:0.787930, Acc_avg:62.50% Training_loss_avg:0.636457
Epoch:3 Step:2768 Training_loss:0.694956, Acc_avg:62.50% Training_loss_avg:0.634399
Epoch:3 Step:2776 Training_loss:0.501738, Acc_avg:62.50% Training_loss_avg:0.634738
Epoch:3 Step:2784 Training_loss:0.406185, Acc_avg:63.50% Training_loss_avg:0.629301
Epoch:3 Step:2792 Training_loss:0.415015, Acc_avg:64.00% Training_loss_avg:0.620895
Epoch:3 Step:2800 Training_loss:0.504826, Acc_avg:65.00% Training_loss_avg:0.613631
Epoch:3 Step:2808 Training_loss:0.485906, Acc_avg:65.50% Training_loss_avg:0.609945
Epoch:3 Step:2816 Training_loss:0.489991, Acc_avg:67.00% Training_loss_avg:0.602424
Epoch:3 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2976 Val_loss:0.641371, Val_Acc_avg:62.25%
Epoch:3 Step:2984 Training_loss:0.648481, Acc_avg:68.00% Training_loss_avg:0.587379
Epoch:3 Step:2992 Training_loss:0.417965, Acc_avg:68.50% Training_loss_avg:0.582825
Epoch:3 Step:3000 Training_loss:0.503849, Acc_avg:68.75% Training_loss_avg:0.582420
Epoch:3 Step:3008 Training_loss:0.454833, Acc_avg:68.75% Training_loss_avg:0.581899
Epoch:3 Step:3016 Training_loss:0.587632, Acc_avg:69.00% Training_loss_avg:0.582033
Epoch:3 Step:3024 Training_loss:0.701759, Acc_avg:68.50% Training_loss_avg:0.584493
Epoch:3 Step:3032 Training_loss:0.651637, Acc_avg:68.00% Training_loss_avg:0.586789
Epoch:3 Step:3040 Training_loss:0.409523, Acc_avg:68.50% Training_loss_avg:0.579120
Epoch:3 Step:3048 Training_loss:0.463793, Acc_avg:68.25% Training_loss_avg:0.581476
Epoch:3 Step:3056 Training_loss:0.714762, Acc_avg:67.25% Training_loss_avg:0.585422
Epoch:3 Step:3064 Training_loss:0.700388, Acc_avg:66.75% Training_loss_avg:0.586890
Epoch:3 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:3224 Val_loss:0.723872, Val_Acc_avg:61.25%
Epoch:3 Step:3232 Training_loss:0.815789, Acc_avg:67.50% Training_loss_avg:0.587939
Epoch:3 Step:3240 Training_loss:0.308293, Acc_avg:68.25% Training_loss_avg:0.583017
Epoch:3 Step:3248 Training_loss:0.544706, Acc_avg:68.00% Training_loss_avg:0.581377
Epoch:3 Step:3256 Training_loss:0.623402, Acc_avg:68.25% Training_loss_avg:0.580168
Epoch:3 Step:3264 Training_loss:0.531041, Acc_avg:69.00% Training_loss_avg:0.575030
Epoch:3 Step:3272 Training_loss:0.613493, Acc_avg:69.25% Training_loss_avg:0.573437
Epoch:3 Step:3280 Training_loss:0.819670, Acc_avg:69.25% Training_loss_avg:0.577377
Epoch:3 Step:3288 Training_loss:0.407348, Acc_avg:69.50% Training_loss_avg:0.574040
Epoch:3 Step:3296 Training_loss:0.431887, Acc_avg:69.50% Training_loss_avg:0.572429
Epoch:3 Step:3304 Training_loss:0.479568, Acc_avg:70.00% Training_loss_avg:0.568922
Epoch:3 Step:3312 Training_loss:0.652125, Acc_avg:69.75% Training_loss_avg:0.573258
Epoch:3 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:0 Val_loss:0.684421, Val_Acc_avg:61.25%
Epoch:4 Step:8 Training_loss:0.609345, Acc_avg:70.08% Training_loss_avg:0.570051
Epoch:4 Step:16 Training_loss:0.779144, Acc_avg:69.83% Training_loss_avg:0.572447
Epoch:4 Step:24 Training_loss:0.581066, Acc_avg:69.33% Training_loss_avg:0.574088
Epoch:4 Step:32 Training_loss:0.560507, Acc_avg:69.58% Training_loss_avg:0.571516
Epoch:4 Step:40 Training_loss:0.432327, Acc_avg:69.83% Training_loss_avg:0.566239
Epoch:4 Step:48 Training_loss:0.608516, Acc_avg:69.83% Training_loss_avg:0.565439
Epoch:4 Step:56 Training_loss:0.535315, Acc_avg:69.58% Training_loss_avg:0.567786
Epoch:4 Step:64 Training_loss:0.680695, Acc_avg:69.08% Training_loss_avg:0.571323
Epoch:4 Step:72 Training_loss:0.519951, Acc_avg:68.83% Training_loss_avg:0.572625
Epoch:4 Step:80 Training_loss:0.378877, Acc_avg:69.33% Training_loss_avg:0.568450
Epoch:4 Step:88 Training_loss:0.458794, Acc_avg:70.08% Training_loss_avg:0.563591
Epoch:4 Step:96 Training_loss:0.796681, Acc_av

52it [00:07,  6.60it/s]


Epoch:4 Step:248 Val_loss:0.790684, Val_Acc_avg:61.25%
Epoch:4 Step:256 Training_loss:0.571661, Acc_avg:73.33% Training_loss_avg:0.552943
Epoch:4 Step:264 Training_loss:0.582506, Acc_avg:73.33% Training_loss_avg:0.555757
Epoch:4 Step:272 Training_loss:0.841757, Acc_avg:73.58% Training_loss_avg:0.560869
Epoch:4 Step:280 Training_loss:0.630205, Acc_avg:73.83% Training_loss_avg:0.562701
Epoch:4 Step:288 Training_loss:0.602657, Acc_avg:73.83% Training_loss_avg:0.564291
Epoch:4 Step:296 Training_loss:0.853158, Acc_avg:74.08% Training_loss_avg:0.565038
Epoch:4 Step:304 Training_loss:0.677859, Acc_avg:73.58% Training_loss_avg:0.572429
Epoch:4 Step:312 Training_loss:0.653125, Acc_avg:73.58% Training_loss_avg:0.574598
Epoch:4 Step:320 Training_loss:0.383802, Acc_avg:73.83% Training_loss_avg:0.569806
Epoch:4 Step:328 Training_loss:0.620953, Acc_avg:73.58% Training_loss_avg:0.571604
Epoch:4 Step:336 Training_loss:0.382180, Acc_avg:74.33% Training_loss_avg:0.566978
Epoch:4 Step:344 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:4 Step:496 Val_loss:0.665487, Val_Acc_avg:59.00%
Epoch:4 Step:504 Training_loss:0.575223, Acc_avg:74.25% Training_loss_avg:0.562043
Epoch:4 Step:512 Training_loss:0.613762, Acc_avg:73.75% Training_loss_avg:0.562928
Epoch:4 Step:520 Training_loss:0.341094, Acc_avg:73.75% Training_loss_avg:0.559818
Epoch:4 Step:528 Training_loss:0.566468, Acc_avg:73.75% Training_loss_avg:0.561542
Epoch:4 Step:536 Training_loss:0.549748, Acc_avg:73.75% Training_loss_avg:0.562479
Epoch:4 Step:544 Training_loss:0.634454, Acc_avg:73.25% Training_loss_avg:0.567082
Epoch:4 Step:552 Training_loss:0.575540, Acc_avg:72.75% Training_loss_avg:0.572481
Epoch:4 Step:560 Training_loss:0.530140, Acc_avg:72.50% Training_loss_avg:0.573919
Epoch:4 Step:568 Training_loss:0.566199, Acc_avg:72.50% Training_loss_avg:0.573938
Epoch:4 Step:576 Training_loss:0.675681, Acc_avg:71.75% Training_loss_avg:0.577691
Epoch:4 Step:584 Training_loss:0.414357, Acc_avg:71.75% Training_loss_avg:0.576116
Epoch:4 Step:592 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:4 Step:744 Val_loss:0.679009, Val_Acc_avg:58.75%
Epoch:4 Step:752 Training_loss:0.689759, Acc_avg:70.50% Training_loss_avg:0.556150
Epoch:4 Step:760 Training_loss:0.460369, Acc_avg:71.25% Training_loss_avg:0.556164
Epoch:4 Step:768 Training_loss:0.547529, Acc_avg:71.25% Training_loss_avg:0.555038
Epoch:4 Step:776 Training_loss:1.003537, Acc_avg:71.00% Training_loss_avg:0.563327
Epoch:4 Step:784 Training_loss:0.767784, Acc_avg:70.25% Training_loss_avg:0.569566
Epoch:4 Step:792 Training_loss:0.564862, Acc_avg:69.50% Training_loss_avg:0.573893
Epoch:4 Step:800 Training_loss:0.396746, Acc_avg:69.75% Training_loss_avg:0.570780
Epoch:4 Step:808 Training_loss:0.493739, Acc_avg:70.50% Training_loss_avg:0.567879
Epoch:4 Step:816 Training_loss:0.531677, Acc_avg:70.50% Training_loss_avg:0.571553
Epoch:4 Step:824 Training_loss:0.637128, Acc_avg:70.50% Training_loss_avg:0.573471
Epoch:4 Step:832 Training_loss:0.610752, Acc_avg:71.00% Training_loss_avg:0.571571
Epoch:4 Step:840 Training_loss:0

52it [00:07,  6.59it/s]


Epoch:4 Step:992 Val_loss:0.672671, Val_Acc_avg:59.00%
Epoch:4 Step:1000 Training_loss:0.513759, Acc_avg:69.50% Training_loss_avg:0.571986
Epoch:4 Step:1008 Training_loss:0.504564, Acc_avg:69.50% Training_loss_avg:0.575316
Epoch:4 Step:1016 Training_loss:0.645179, Acc_avg:69.75% Training_loss_avg:0.577352
Epoch:4 Step:1024 Training_loss:0.525605, Acc_avg:70.25% Training_loss_avg:0.575515
Epoch:4 Step:1032 Training_loss:0.784142, Acc_avg:70.00% Training_loss_avg:0.580707
Epoch:4 Step:1040 Training_loss:0.507552, Acc_avg:70.00% Training_loss_avg:0.580406
Epoch:4 Step:1048 Training_loss:0.312061, Acc_avg:70.25% Training_loss_avg:0.576940
Epoch:4 Step:1056 Training_loss:0.500048, Acc_avg:70.00% Training_loss_avg:0.572594
Epoch:4 Step:1064 Training_loss:0.540480, Acc_avg:70.00% Training_loss_avg:0.571892
Epoch:4 Step:1072 Training_loss:0.803765, Acc_avg:69.75% Training_loss_avg:0.575100
Epoch:4 Step:1080 Training_loss:0.574326, Acc_avg:69.50% Training_loss_avg:0.578369
Epoch:4 Step:1088 Tra

52it [00:07,  6.57it/s]


Epoch:4 Step:1240 Val_loss:0.678949, Val_Acc_avg:61.00%
Epoch:4 Step:1248 Training_loss:0.861173, Acc_avg:67.75% Training_loss_avg:0.579204
Epoch:4 Step:1256 Training_loss:0.523205, Acc_avg:67.50% Training_loss_avg:0.580674
Epoch:4 Step:1264 Training_loss:0.613989, Acc_avg:67.50% Training_loss_avg:0.578221
Epoch:4 Step:1272 Training_loss:0.513237, Acc_avg:67.50% Training_loss_avg:0.576028
Epoch:4 Step:1280 Training_loss:0.789278, Acc_avg:66.75% Training_loss_avg:0.579349
Epoch:4 Step:1288 Training_loss:0.685895, Acc_avg:67.00% Training_loss_avg:0.581392
Epoch:4 Step:1296 Training_loss:0.923600, Acc_avg:66.25% Training_loss_avg:0.589867
Epoch:4 Step:1304 Training_loss:0.546195, Acc_avg:66.75% Training_loss_avg:0.587965
Epoch:4 Step:1312 Training_loss:0.344395, Acc_avg:67.00% Training_loss_avg:0.584810
Epoch:4 Step:1320 Training_loss:0.378596, Acc_avg:67.75% Training_loss_avg:0.581285
Epoch:4 Step:1328 Training_loss:0.490294, Acc_avg:67.75% Training_loss_avg:0.578684
Epoch:4 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:1488 Val_loss:0.646510, Val_Acc_avg:60.25%
Epoch:4 Step:1496 Training_loss:0.550882, Acc_avg:65.50% Training_loss_avg:0.594469
Epoch:4 Step:1504 Training_loss:0.511419, Acc_avg:66.75% Training_loss_avg:0.587654
Epoch:4 Step:1512 Training_loss:0.537866, Acc_avg:66.50% Training_loss_avg:0.587427
Epoch:4 Step:1520 Training_loss:0.612327, Acc_avg:66.00% Training_loss_avg:0.591146
Epoch:4 Step:1528 Training_loss:0.372632, Acc_avg:66.25% Training_loss_avg:0.588116
Epoch:4 Step:1536 Training_loss:0.609013, Acc_avg:65.75% Training_loss_avg:0.590050
Epoch:4 Step:1544 Training_loss:0.539448, Acc_avg:65.50% Training_loss_avg:0.592740
Epoch:4 Step:1552 Training_loss:0.631122, Acc_avg:64.75% Training_loss_avg:0.594939
Epoch:4 Step:1560 Training_loss:0.498656, Acc_avg:65.25% Training_loss_avg:0.594726
Epoch:4 Step:1568 Training_loss:0.590849, Acc_avg:65.25% Training_loss_avg:0.597281
Epoch:4 Step:1576 Training_loss:0.608070, Acc_avg:65.25% Training_loss_avg:0.599900
Epoch:4 Step:1584 Tr

52it [00:07,  6.59it/s]


Epoch:4 Step:1736 Val_loss:0.762729, Val_Acc_avg:60.50%
Epoch:4 Step:1744 Training_loss:0.462452, Acc_avg:68.50% Training_loss_avg:0.578203
Epoch:4 Step:1752 Training_loss:0.284714, Acc_avg:69.25% Training_loss_avg:0.571802
Epoch:4 Step:1760 Training_loss:0.769803, Acc_avg:69.25% Training_loss_avg:0.576842
Epoch:4 Step:1768 Training_loss:0.809312, Acc_avg:68.25% Training_loss_avg:0.584156
Epoch:4 Step:1776 Training_loss:0.574281, Acc_avg:69.00% Training_loss_avg:0.579391
Epoch:4 Step:1784 Training_loss:0.468932, Acc_avg:69.25% Training_loss_avg:0.578098
Epoch:4 Step:1792 Training_loss:0.327263, Acc_avg:70.75% Training_loss_avg:0.570212
Epoch:4 Step:1800 Training_loss:0.223324, Acc_avg:72.25% Training_loss_avg:0.560247
Epoch:4 Step:1808 Training_loss:0.606873, Acc_avg:72.50% Training_loss_avg:0.557957
Epoch:4 Step:1816 Training_loss:0.764428, Acc_avg:72.00% Training_loss_avg:0.564425
Epoch:4 Step:1824 Training_loss:0.322272, Acc_avg:72.25% Training_loss_avg:0.559300
Epoch:4 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:1984 Val_loss:0.656382, Val_Acc_avg:59.50%
Epoch:4 Step:1992 Training_loss:0.570090, Acc_avg:70.00% Training_loss_avg:0.561632
Epoch:4 Step:2000 Training_loss:0.618051, Acc_avg:70.25% Training_loss_avg:0.557726
Epoch:4 Step:2008 Training_loss:0.494761, Acc_avg:70.25% Training_loss_avg:0.557153
Epoch:4 Step:2016 Training_loss:0.639872, Acc_avg:70.25% Training_loss_avg:0.559689
Epoch:4 Step:2024 Training_loss:0.478655, Acc_avg:71.50% Training_loss_avg:0.553617
Epoch:4 Step:2032 Training_loss:0.880246, Acc_avg:71.50% Training_loss_avg:0.553733
Epoch:4 Step:2040 Training_loss:0.557663, Acc_avg:71.25% Training_loss_avg:0.549845
Epoch:4 Step:2048 Training_loss:0.632489, Acc_avg:70.75% Training_loss_avg:0.551949
Epoch:4 Step:2056 Training_loss:0.336113, Acc_avg:71.50% Training_loss_avg:0.545204
Epoch:4 Step:2064 Training_loss:0.482532, Acc_avg:71.75% Training_loss_avg:0.545989
Epoch:4 Step:2072 Training_loss:0.487521, Acc_avg:71.75% Training_loss_avg:0.546601
Epoch:4 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:2232 Val_loss:0.675878, Val_Acc_avg:60.00%
Epoch:4 Step:2240 Training_loss:0.596178, Acc_avg:69.00% Training_loss_avg:0.555810
Epoch:4 Step:2248 Training_loss:0.598761, Acc_avg:68.75% Training_loss_avg:0.555949
Epoch:4 Step:2256 Training_loss:0.574389, Acc_avg:68.50% Training_loss_avg:0.559985
Epoch:4 Step:2264 Training_loss:0.744761, Acc_avg:67.00% Training_loss_avg:0.571504
Epoch:4 Step:2272 Training_loss:0.710473, Acc_avg:67.00% Training_loss_avg:0.568523
Epoch:4 Step:2280 Training_loss:0.402906, Acc_avg:67.75% Training_loss_avg:0.561330
Epoch:4 Step:2288 Training_loss:0.724565, Acc_avg:66.75% Training_loss_avg:0.563195
Epoch:4 Step:2296 Training_loss:0.501804, Acc_avg:66.75% Training_loss_avg:0.564348
Epoch:4 Step:2304 Training_loss:0.348924, Acc_avg:67.75% Training_loss_avg:0.557801
Epoch:4 Step:2312 Training_loss:0.749912, Acc_avg:67.25% Training_loss_avg:0.562717
Epoch:4 Step:2320 Training_loss:0.402280, Acc_avg:67.50% Training_loss_avg:0.558522
Epoch:4 Step:2328 Tr

52it [00:07,  6.59it/s]


Epoch:4 Step:2480 Val_loss:0.649705, Val_Acc_avg:61.50%
Epoch:4 Step:2488 Training_loss:0.418148, Acc_avg:67.00% Training_loss_avg:0.575175
Epoch:4 Step:2496 Training_loss:0.575109, Acc_avg:67.00% Training_loss_avg:0.574331
Epoch:4 Step:2504 Training_loss:0.627776, Acc_avg:66.25% Training_loss_avg:0.581699
Epoch:4 Step:2512 Training_loss:0.562599, Acc_avg:66.00% Training_loss_avg:0.584870
Epoch:4 Step:2520 Training_loss:0.576953, Acc_avg:66.25% Training_loss_avg:0.584825
Epoch:4 Step:2528 Training_loss:0.513832, Acc_avg:66.25% Training_loss_avg:0.583773
Epoch:4 Step:2536 Training_loss:0.476720, Acc_avg:66.50% Training_loss_avg:0.583340
Epoch:4 Step:2544 Training_loss:0.374891, Acc_avg:67.00% Training_loss_avg:0.581293
Epoch:4 Step:2552 Training_loss:0.611504, Acc_avg:66.75% Training_loss_avg:0.582635
Epoch:4 Step:2560 Training_loss:0.441608, Acc_avg:67.25% Training_loss_avg:0.579265
Epoch:4 Step:2568 Training_loss:0.722141, Acc_avg:66.75% Training_loss_avg:0.579704
Epoch:4 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:2728 Val_loss:0.649943, Val_Acc_avg:58.75%
Epoch:4 Step:2736 Training_loss:0.263467, Acc_avg:68.00% Training_loss_avg:0.586400
Epoch:4 Step:2744 Training_loss:0.700553, Acc_avg:67.75% Training_loss_avg:0.587623
Epoch:4 Step:2752 Training_loss:0.393567, Acc_avg:68.75% Training_loss_avg:0.576000
Epoch:4 Step:2760 Training_loss:0.514447, Acc_avg:68.75% Training_loss_avg:0.572561
Epoch:4 Step:2768 Training_loss:0.885084, Acc_avg:68.50% Training_loss_avg:0.571755
Epoch:4 Step:2776 Training_loss:0.334351, Acc_avg:69.25% Training_loss_avg:0.562738
Epoch:4 Step:2784 Training_loss:0.365829, Acc_avg:70.25% Training_loss_avg:0.556063
Epoch:4 Step:2792 Training_loss:0.517371, Acc_avg:69.50% Training_loss_avg:0.558753
Epoch:4 Step:2800 Training_loss:0.607387, Acc_avg:69.75% Training_loss_avg:0.557618
Epoch:4 Step:2808 Training_loss:0.594127, Acc_avg:69.75% Training_loss_avg:0.559957
Epoch:4 Step:2816 Training_loss:0.508931, Acc_avg:70.00% Training_loss_avg:0.556462
Epoch:4 Step:2824 Tr

52it [00:07,  6.59it/s]


Epoch:4 Step:2976 Val_loss:0.661207, Val_Acc_avg:63.25%
Epoch:4 Step:2984 Training_loss:0.743741, Acc_avg:68.25% Training_loss_avg:0.574354
Epoch:4 Step:2992 Training_loss:0.401419, Acc_avg:68.50% Training_loss_avg:0.573041
Epoch:4 Step:3000 Training_loss:0.638137, Acc_avg:69.00% Training_loss_avg:0.574354
Epoch:4 Step:3008 Training_loss:0.469935, Acc_avg:69.25% Training_loss_avg:0.574492
Epoch:4 Step:3016 Training_loss:0.629365, Acc_avg:69.00% Training_loss_avg:0.572833
Epoch:4 Step:3024 Training_loss:0.473024, Acc_avg:69.00% Training_loss_avg:0.574031
Epoch:4 Step:3032 Training_loss:0.455103, Acc_avg:69.25% Training_loss_avg:0.568032
Epoch:4 Step:3040 Training_loss:0.512412, Acc_avg:69.75% Training_loss_avg:0.563583
Epoch:4 Step:3048 Training_loss:0.599136, Acc_avg:70.25% Training_loss_avg:0.561735
Epoch:4 Step:3056 Training_loss:0.573720, Acc_avg:70.75% Training_loss_avg:0.559576
Epoch:4 Step:3064 Training_loss:0.361666, Acc_avg:71.25% Training_loss_avg:0.554976
Epoch:4 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:3224 Val_loss:0.680723, Val_Acc_avg:62.50%
Epoch:4 Step:3232 Training_loss:0.470278, Acc_avg:72.75% Training_loss_avg:0.533979
Epoch:4 Step:3240 Training_loss:0.667614, Acc_avg:72.00% Training_loss_avg:0.538905
Epoch:4 Step:3248 Training_loss:0.584908, Acc_avg:72.25% Training_loss_avg:0.540639
Epoch:4 Step:3256 Training_loss:0.419002, Acc_avg:72.25% Training_loss_avg:0.541701
Epoch:4 Step:3264 Training_loss:0.601236, Acc_avg:72.00% Training_loss_avg:0.543475
Epoch:4 Step:3272 Training_loss:0.678155, Acc_avg:72.00% Training_loss_avg:0.546434
Epoch:4 Step:3280 Training_loss:0.534536, Acc_avg:71.75% Training_loss_avg:0.544336
Epoch:4 Step:3288 Training_loss:0.441876, Acc_avg:71.75% Training_loss_avg:0.542117
Epoch:4 Step:3296 Training_loss:0.364218, Acc_avg:72.75% Training_loss_avg:0.536356
Epoch:4 Step:3304 Training_loss:0.443095, Acc_avg:73.25% Training_loss_avg:0.531622
Epoch:4 Step:3312 Training_loss:0.235256, Acc_avg:73.50% Training_loss_avg:0.529201
Epoch:4 Step:3320 Tr

In [177]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """
    run_name = "lr_5e-6 redo, 512 split with attention! redo 6"
    run_dir = "codebert_finetune_runs/{}".format(run_name)
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False
    load_splits = False
    save_data = True

    if load_splits:
      train_data, val_data, test_data = split_loader(run_dir)
      X_train, A_train, Y_train = train_data
      X_val, A_val, Y_val = val_data
      X_test, A_test, Y_test = test_data
      print("loaded data splits")

    else:
      print("generating data splits")

      code_df = preprocess_data(file_loc='code_dataset.jsonl')
      train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

      X_train, A_train, Y_train = train_data
      X_val, A_val, Y_val = val_data
      X_test, A_test, Y_test = test_data

      data_type = ['train', 'val', 'test']
      data_split_type = ['X', 'A', 'Y']

      print(X_train.shape)
    # Creating dir to save logs and checkpoints, re
    dir_name = "{}".format(run_dir)
    if os.path.exists(dir_name):
        input("run name already exists, press Enter to overwrite")
    else:
        os.makedirs(dir_name)


    if save_data:
      print("saving data splits")

      data_all = [train_data, val_data, test_data]
      for i, data in enumerate(data_all):
        for j, split in enumerate(data):
          with open('{}/{}_{}.pickle'.format(run_dir,data_type[i], data_split_type[j]), 'wb') as handle:
            pickle.dump(split, handle)


    print(train_data[0].shape)
    print("Data points: {}".format(len(train_data)))

    # Loading model from checkpoint if location provided
    if online:
        print("loading model from online")
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        print("loading model from local repo")

        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        print("loading model from checkpoint: {}".format(checkpoint_location))
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=5,
          batch_size=8,
          learning_rate=3e-6,
          validate_per=250,
          run_name=run_name,
          run_descrption="lr_3e-6 redo, 256 split, saving splits")


In [178]:
torch.cuda.empty_cache()
main()

generating data splits
Insecure code counts: 3729, Total code counts: 8000, Proportion 0.466125
Data points: 8000




torch.Size([3331, 512])
saving data splits
torch.Size([3331, 512])
Data points: 3
loading model from local repo


Some weights of the model checkpoint at codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at codebert-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for p

Epoch:0 Step:0 Training_loss:0.667126, Acc_avg:62.50% Training_loss_avg:0.667126
Validating:


52it [00:07,  6.59it/s]


Epoch:0 Step:0 Val_loss:0.690983, Val_Acc_avg:54.25%
Epoch:0 Step:8 Training_loss:0.709088, Acc_avg:56.25% Training_loss_avg:0.688107
Epoch:0 Step:16 Training_loss:0.698459, Acc_avg:50.00% Training_loss_avg:0.691558
Epoch:0 Step:24 Training_loss:0.712653, Acc_avg:50.00% Training_loss_avg:0.696832
Epoch:0 Step:32 Training_loss:0.704778, Acc_avg:50.00% Training_loss_avg:0.698421
Epoch:0 Step:40 Training_loss:0.653642, Acc_avg:52.08% Training_loss_avg:0.690958
Epoch:0 Step:48 Training_loss:0.751974, Acc_avg:46.43% Training_loss_avg:0.699674
Epoch:0 Step:56 Training_loss:0.725819, Acc_avg:45.31% Training_loss_avg:0.702942
Epoch:0 Step:64 Training_loss:0.710670, Acc_avg:44.44% Training_loss_avg:0.703801
Epoch:0 Step:72 Training_loss:0.696412, Acc_avg:43.75% Training_loss_avg:0.703062
Epoch:0 Step:80 Training_loss:0.657638, Acc_avg:46.59% Training_loss_avg:0.698933
Epoch:0 Step:88 Training_loss:0.711821, Acc_avg:46.88% Training_loss_avg:0.700007
Epoch:0 Step:96 Training_loss:0.676815, Acc_av

52it [00:07,  6.57it/s]


Epoch:0 Step:248 Val_loss:0.690797, Val_Acc_avg:54.25%
Epoch:0 Step:256 Training_loss:0.639177, Acc_avg:48.86% Training_loss_avg:0.696481
Epoch:0 Step:264 Training_loss:0.659795, Acc_avg:49.26% Training_loss_avg:0.695402
Epoch:0 Step:272 Training_loss:0.683633, Acc_avg:49.64% Training_loss_avg:0.695066
Epoch:0 Step:280 Training_loss:0.683068, Acc_avg:49.31% Training_loss_avg:0.694733
Epoch:0 Step:288 Training_loss:0.679893, Acc_avg:49.32% Training_loss_avg:0.694332
Epoch:0 Step:296 Training_loss:0.683900, Acc_avg:49.34% Training_loss_avg:0.694057
Epoch:0 Step:304 Training_loss:0.712757, Acc_avg:49.68% Training_loss_avg:0.694537
Epoch:0 Step:312 Training_loss:0.654668, Acc_avg:50.00% Training_loss_avg:0.693540
Epoch:0 Step:320 Training_loss:0.713921, Acc_avg:49.70% Training_loss_avg:0.694037
Epoch:0 Step:328 Training_loss:0.645714, Acc_avg:50.60% Training_loss_avg:0.692886
Epoch:0 Step:336 Training_loss:0.744353, Acc_avg:50.00% Training_loss_avg:0.694083
Epoch:0 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:0 Step:496 Val_loss:0.692195, Val_Acc_avg:54.25%
Epoch:0 Step:504 Training_loss:0.779638, Acc_avg:51.25% Training_loss_avg:0.694046
Epoch:0 Step:512 Training_loss:0.771046, Acc_avg:51.00% Training_loss_avg:0.695262
Epoch:0 Step:520 Training_loss:0.715992, Acc_avg:51.25% Training_loss_avg:0.695262
Epoch:0 Step:528 Training_loss:0.717990, Acc_avg:51.00% Training_loss_avg:0.696008
Epoch:0 Step:536 Training_loss:0.695787, Acc_avg:50.75% Training_loss_avg:0.696856
Epoch:0 Step:544 Training_loss:0.734413, Acc_avg:51.25% Training_loss_avg:0.696011
Epoch:0 Step:552 Training_loss:0.723202, Acc_avg:50.75% Training_loss_avg:0.697461
Epoch:0 Step:560 Training_loss:0.690388, Acc_avg:51.00% Training_loss_avg:0.696907
Epoch:0 Step:568 Training_loss:0.652499, Acc_avg:51.75% Training_loss_avg:0.696058
Epoch:0 Step:576 Training_loss:0.705798, Acc_avg:52.50% Training_loss_avg:0.696699
Epoch:0 Step:584 Training_loss:0.717432, Acc_avg:52.75% Training_loss_avg:0.696697
Epoch:0 Step:592 Training_loss:0

52it [00:07,  6.58it/s]


Epoch:0 Step:744 Val_loss:0.690270, Val_Acc_avg:54.25%
Epoch:0 Step:752 Training_loss:0.687889, Acc_avg:54.75% Training_loss_avg:0.695491
Epoch:0 Step:760 Training_loss:0.675476, Acc_avg:55.25% Training_loss_avg:0.695190
Epoch:0 Step:768 Training_loss:0.657793, Acc_avg:55.50% Training_loss_avg:0.694858
Epoch:0 Step:776 Training_loss:0.780609, Acc_avg:54.75% Training_loss_avg:0.697144
Epoch:0 Step:784 Training_loss:0.739204, Acc_avg:54.50% Training_loss_avg:0.697313
Epoch:0 Step:792 Training_loss:0.694461, Acc_avg:54.50% Training_loss_avg:0.697434
Epoch:0 Step:800 Training_loss:0.670850, Acc_avg:55.00% Training_loss_avg:0.696749
Epoch:0 Step:808 Training_loss:0.685063, Acc_avg:55.00% Training_loss_avg:0.697277
Epoch:0 Step:816 Training_loss:0.684592, Acc_avg:54.50% Training_loss_avg:0.697706
Epoch:0 Step:824 Training_loss:0.655563, Acc_avg:55.75% Training_loss_avg:0.696049
Epoch:0 Step:832 Training_loss:0.684246, Acc_avg:55.25% Training_loss_avg:0.696925
Epoch:0 Step:840 Training_loss:0

52it [00:07,  6.59it/s]


Epoch:0 Step:992 Val_loss:0.690323, Val_Acc_avg:54.25%
Epoch:0 Step:1000 Training_loss:0.704868, Acc_avg:54.25% Training_loss_avg:0.691205
Epoch:0 Step:1008 Training_loss:0.760311, Acc_avg:53.75% Training_loss_avg:0.692949
Epoch:0 Step:1016 Training_loss:0.663617, Acc_avg:54.00% Training_loss_avg:0.692022
Epoch:0 Step:1024 Training_loss:0.626859, Acc_avg:55.00% Training_loss_avg:0.689958
Epoch:0 Step:1032 Training_loss:0.701577, Acc_avg:54.75% Training_loss_avg:0.690682
Epoch:0 Step:1040 Training_loss:0.684964, Acc_avg:54.50% Training_loss_avg:0.691293
Epoch:0 Step:1048 Training_loss:0.785079, Acc_avg:53.50% Training_loss_avg:0.693707
Epoch:0 Step:1056 Training_loss:0.796933, Acc_avg:53.00% Training_loss_avg:0.695165
Epoch:0 Step:1064 Training_loss:0.638338, Acc_avg:53.75% Training_loss_avg:0.693453
Epoch:0 Step:1072 Training_loss:0.676239, Acc_avg:53.50% Training_loss_avg:0.692786
Epoch:0 Step:1080 Training_loss:0.735108, Acc_avg:52.75% Training_loss_avg:0.694273
Epoch:0 Step:1088 Tra

52it [00:07,  6.59it/s]


Epoch:0 Step:1240 Val_loss:0.689286, Val_Acc_avg:55.00%
Epoch:0 Step:1248 Training_loss:0.687899, Acc_avg:49.50% Training_loss_avg:0.697718
Epoch:0 Step:1256 Training_loss:0.753435, Acc_avg:49.25% Training_loss_avg:0.698359
Epoch:0 Step:1264 Training_loss:0.668964, Acc_avg:49.00% Training_loss_avg:0.698534
Epoch:0 Step:1272 Training_loss:0.666846, Acc_avg:49.50% Training_loss_avg:0.697053
Epoch:0 Step:1280 Training_loss:0.640887, Acc_avg:49.75% Training_loss_avg:0.697154
Epoch:0 Step:1288 Training_loss:0.672051, Acc_avg:49.75% Training_loss_avg:0.697402
Epoch:0 Step:1296 Training_loss:0.680093, Acc_avg:49.75% Training_loss_avg:0.696643
Epoch:0 Step:1304 Training_loss:0.671291, Acc_avg:49.75% Training_loss_avg:0.696473
Epoch:0 Step:1312 Training_loss:0.690046, Acc_avg:49.00% Training_loss_avg:0.696867
Epoch:0 Step:1320 Training_loss:0.671963, Acc_avg:49.25% Training_loss_avg:0.695464
Epoch:0 Step:1328 Training_loss:0.692886, Acc_avg:49.25% Training_loss_avg:0.695554
Epoch:0 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:1488 Val_loss:0.692307, Val_Acc_avg:50.00%
Epoch:0 Step:1496 Training_loss:0.662567, Acc_avg:50.75% Training_loss_avg:0.695264
Epoch:0 Step:1504 Training_loss:0.636968, Acc_avg:51.50% Training_loss_avg:0.694136
Epoch:0 Step:1512 Training_loss:0.659263, Acc_avg:52.00% Training_loss_avg:0.693650
Epoch:0 Step:1520 Training_loss:0.719982, Acc_avg:52.00% Training_loss_avg:0.693762
Epoch:0 Step:1528 Training_loss:0.660740, Acc_avg:52.25% Training_loss_avg:0.691705
Epoch:0 Step:1536 Training_loss:0.765842, Acc_avg:51.25% Training_loss_avg:0.693992
Epoch:0 Step:1544 Training_loss:0.736984, Acc_avg:51.25% Training_loss_avg:0.694450
Epoch:0 Step:1552 Training_loss:0.713422, Acc_avg:50.75% Training_loss_avg:0.695086
Epoch:0 Step:1560 Training_loss:0.748109, Acc_avg:50.75% Training_loss_avg:0.695939
Epoch:0 Step:1568 Training_loss:0.738721, Acc_avg:50.75% Training_loss_avg:0.697033
Epoch:0 Step:1576 Training_loss:0.714442, Acc_avg:51.00% Training_loss_avg:0.696991
Epoch:0 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:1736 Val_loss:0.693396, Val_Acc_avg:54.25%
Epoch:0 Step:1744 Training_loss:0.732753, Acc_avg:52.25% Training_loss_avg:0.700027
Epoch:0 Step:1752 Training_loss:0.668196, Acc_avg:52.75% Training_loss_avg:0.699251
Epoch:0 Step:1760 Training_loss:0.681245, Acc_avg:53.00% Training_loss_avg:0.698731
Epoch:0 Step:1768 Training_loss:0.724025, Acc_avg:53.00% Training_loss_avg:0.699070
Epoch:0 Step:1776 Training_loss:0.656047, Acc_avg:53.00% Training_loss_avg:0.698791
Epoch:0 Step:1784 Training_loss:0.618944, Acc_avg:53.25% Training_loss_avg:0.696896
Epoch:0 Step:1792 Training_loss:0.664094, Acc_avg:53.75% Training_loss_avg:0.695304
Epoch:0 Step:1800 Training_loss:0.656349, Acc_avg:53.75% Training_loss_avg:0.693760
Epoch:0 Step:1808 Training_loss:0.687555, Acc_avg:54.00% Training_loss_avg:0.693417
Epoch:0 Step:1816 Training_loss:0.580544, Acc_avg:54.00% Training_loss_avg:0.691586
Epoch:0 Step:1824 Training_loss:0.732315, Acc_avg:53.50% Training_loss_avg:0.692661
Epoch:0 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:1984 Val_loss:0.698229, Val_Acc_avg:54.25%
Epoch:0 Step:1992 Training_loss:0.717185, Acc_avg:57.75% Training_loss_avg:0.678878
Epoch:0 Step:2000 Training_loss:0.699202, Acc_avg:57.75% Training_loss_avg:0.679401
Epoch:0 Step:2008 Training_loss:0.647020, Acc_avg:57.75% Training_loss_avg:0.678929
Epoch:0 Step:2016 Training_loss:0.666064, Acc_avg:57.75% Training_loss_avg:0.678469
Epoch:0 Step:2024 Training_loss:0.636378, Acc_avg:58.00% Training_loss_avg:0.676621
Epoch:0 Step:2032 Training_loss:0.699803, Acc_avg:58.75% Training_loss_avg:0.675584
Epoch:0 Step:2040 Training_loss:0.683920, Acc_avg:58.50% Training_loss_avg:0.675993
Epoch:0 Step:2048 Training_loss:0.673218, Acc_avg:58.00% Training_loss_avg:0.676170
Epoch:0 Step:2056 Training_loss:0.633516, Acc_avg:58.75% Training_loss_avg:0.674119
Epoch:0 Step:2064 Training_loss:0.621562, Acc_avg:59.00% Training_loss_avg:0.672464
Epoch:0 Step:2072 Training_loss:0.647833, Acc_avg:59.25% Training_loss_avg:0.672025
Epoch:0 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:2232 Val_loss:0.692016, Val_Acc_avg:54.25%
Epoch:0 Step:2240 Training_loss:0.664030, Acc_avg:60.75% Training_loss_avg:0.671634
Epoch:0 Step:2248 Training_loss:0.653604, Acc_avg:60.50% Training_loss_avg:0.672288
Epoch:0 Step:2256 Training_loss:0.786957, Acc_avg:59.75% Training_loss_avg:0.675843
Epoch:0 Step:2264 Training_loss:0.573664, Acc_avg:60.25% Training_loss_avg:0.674041
Epoch:0 Step:2272 Training_loss:0.746483, Acc_avg:59.75% Training_loss_avg:0.676478
Epoch:0 Step:2280 Training_loss:0.574257, Acc_avg:59.75% Training_loss_avg:0.675443
Epoch:0 Step:2288 Training_loss:0.655523, Acc_avg:59.75% Training_loss_avg:0.675099
Epoch:0 Step:2296 Training_loss:0.685692, Acc_avg:60.00% Training_loss_avg:0.674759
Epoch:0 Step:2304 Training_loss:0.610059, Acc_avg:60.25% Training_loss_avg:0.673960
Epoch:0 Step:2312 Training_loss:0.663568, Acc_avg:60.25% Training_loss_avg:0.673804
Epoch:0 Step:2320 Training_loss:0.809603, Acc_avg:59.50% Training_loss_avg:0.675939
Epoch:0 Step:2328 Tr

52it [00:07,  6.59it/s]


Epoch:0 Step:2480 Val_loss:0.686342, Val_Acc_avg:54.25%
Epoch:0 Step:2488 Training_loss:0.769699, Acc_avg:58.75% Training_loss_avg:0.680886
Epoch:0 Step:2496 Training_loss:0.570042, Acc_avg:58.75% Training_loss_avg:0.680019
Epoch:0 Step:2504 Training_loss:0.683044, Acc_avg:59.25% Training_loss_avg:0.678265
Epoch:0 Step:2512 Training_loss:0.774515, Acc_avg:58.75% Training_loss_avg:0.679570
Epoch:0 Step:2520 Training_loss:0.779217, Acc_avg:57.75% Training_loss_avg:0.681881
Epoch:0 Step:2528 Training_loss:0.657010, Acc_avg:57.50% Training_loss_avg:0.682824
Epoch:0 Step:2536 Training_loss:0.690758, Acc_avg:58.00% Training_loss_avg:0.681973
Epoch:0 Step:2544 Training_loss:0.664261, Acc_avg:58.25% Training_loss_avg:0.679767
Epoch:0 Step:2552 Training_loss:0.689363, Acc_avg:57.50% Training_loss_avg:0.681710
Epoch:0 Step:2560 Training_loss:0.617043, Acc_avg:57.75% Training_loss_avg:0.681374
Epoch:0 Step:2568 Training_loss:0.772235, Acc_avg:56.50% Training_loss_avg:0.685122
Epoch:0 Step:2576 Tr

52it [00:07,  6.58it/s]


Epoch:0 Step:2728 Val_loss:0.681944, Val_Acc_avg:54.25%
Epoch:0 Step:2736 Training_loss:0.629830, Acc_avg:57.25% Training_loss_avg:0.679332
Epoch:0 Step:2744 Training_loss:0.721622, Acc_avg:56.50% Training_loss_avg:0.682465
Epoch:0 Step:2752 Training_loss:0.634984, Acc_avg:56.75% Training_loss_avg:0.681423
Epoch:0 Step:2760 Training_loss:0.695734, Acc_avg:56.50% Training_loss_avg:0.680219
Epoch:0 Step:2768 Training_loss:0.551426, Acc_avg:57.75% Training_loss_avg:0.676325
Epoch:0 Step:2776 Training_loss:0.655806, Acc_avg:57.75% Training_loss_avg:0.676769
Epoch:0 Step:2784 Training_loss:0.741671, Acc_avg:57.50% Training_loss_avg:0.679575
Epoch:0 Step:2792 Training_loss:0.661364, Acc_avg:57.75% Training_loss_avg:0.679534
Epoch:0 Step:2800 Training_loss:0.591551, Acc_avg:58.50% Training_loss_avg:0.677430
Epoch:0 Step:2808 Training_loss:0.651149, Acc_avg:59.00% Training_loss_avg:0.675076
Epoch:0 Step:2816 Training_loss:0.702690, Acc_avg:58.75% Training_loss_avg:0.676255
Epoch:0 Step:2824 Tr

52it [00:07,  6.59it/s]


Epoch:0 Step:2976 Val_loss:0.676247, Val_Acc_avg:55.25%
Epoch:0 Step:2984 Training_loss:0.604634, Acc_avg:59.25% Training_loss_avg:0.674113
Epoch:0 Step:2992 Training_loss:0.732145, Acc_avg:59.00% Training_loss_avg:0.674196
Epoch:0 Step:3000 Training_loss:0.649335, Acc_avg:59.00% Training_loss_avg:0.674649
Epoch:0 Step:3008 Training_loss:0.676626, Acc_avg:58.75% Training_loss_avg:0.675242
Epoch:0 Step:3016 Training_loss:0.648138, Acc_avg:58.75% Training_loss_avg:0.674167
Epoch:0 Step:3024 Training_loss:0.672988, Acc_avg:59.00% Training_loss_avg:0.673958
Epoch:0 Step:3032 Training_loss:0.657001, Acc_avg:58.25% Training_loss_avg:0.675372
Epoch:0 Step:3040 Training_loss:0.672864, Acc_avg:57.75% Training_loss_avg:0.675565
Epoch:0 Step:3048 Training_loss:0.677571, Acc_avg:57.50% Training_loss_avg:0.677541
Epoch:0 Step:3056 Training_loss:0.667523, Acc_avg:57.50% Training_loss_avg:0.678647
Epoch:0 Step:3064 Training_loss:0.653846, Acc_avg:57.50% Training_loss_avg:0.678086
Epoch:0 Step:3072 Tr

52it [00:07,  6.57it/s]


Epoch:0 Step:3224 Val_loss:0.682713, Val_Acc_avg:54.25%
Epoch:0 Step:3232 Training_loss:0.762462, Acc_avg:57.00% Training_loss_avg:0.681393
Epoch:0 Step:3240 Training_loss:0.783243, Acc_avg:56.75% Training_loss_avg:0.683167
Epoch:0 Step:3248 Training_loss:0.658195, Acc_avg:56.50% Training_loss_avg:0.684441
Epoch:0 Step:3256 Training_loss:0.675787, Acc_avg:57.00% Training_loss_avg:0.683716
Epoch:0 Step:3264 Training_loss:0.760467, Acc_avg:56.75% Training_loss_avg:0.685520
Epoch:0 Step:3272 Training_loss:0.892978, Acc_avg:55.50% Training_loss_avg:0.690096
Epoch:0 Step:3280 Training_loss:0.660688, Acc_avg:56.00% Training_loss_avg:0.689179
Epoch:0 Step:3288 Training_loss:0.629079, Acc_avg:56.75% Training_loss_avg:0.686159
Epoch:0 Step:3296 Training_loss:0.712955, Acc_avg:56.75% Training_loss_avg:0.684381
Epoch:0 Step:3304 Training_loss:0.734077, Acc_avg:56.25% Training_loss_avg:0.685011
Epoch:0 Step:3312 Training_loss:0.654644, Acc_avg:56.25% Training_loss_avg:0.684787
Epoch:0 Step:3320 Tr

52it [00:07,  6.58it/s]


Epoch:1 Step:0 Val_loss:0.670451, Val_Acc_avg:60.00%
Epoch:1 Step:8 Training_loss:0.762153, Acc_avg:56.00% Training_loss_avg:0.679751
Epoch:1 Step:16 Training_loss:0.649430, Acc_avg:55.75% Training_loss_avg:0.680379
Epoch:1 Step:24 Training_loss:0.700613, Acc_avg:55.50% Training_loss_avg:0.680551
Epoch:1 Step:32 Training_loss:0.694600, Acc_avg:55.50% Training_loss_avg:0.680589
Epoch:1 Step:40 Training_loss:0.712103, Acc_avg:54.75% Training_loss_avg:0.683310
Epoch:1 Step:48 Training_loss:0.676327, Acc_avg:54.75% Training_loss_avg:0.684744
Epoch:1 Step:56 Training_loss:0.733625, Acc_avg:55.00% Training_loss_avg:0.684773
Epoch:1 Step:64 Training_loss:0.681229, Acc_avg:54.75% Training_loss_avg:0.685411
Epoch:1 Step:72 Training_loss:0.659606, Acc_avg:55.25% Training_loss_avg:0.685071
Epoch:1 Step:80 Training_loss:0.719031, Acc_avg:55.00% Training_loss_avg:0.686489
Epoch:1 Step:88 Training_loss:0.711947, Acc_avg:55.00% Training_loss_avg:0.687268
Epoch:1 Step:96 Training_loss:0.693179, Acc_av

52it [00:07,  6.58it/s]


Epoch:1 Step:248 Val_loss:0.671532, Val_Acc_avg:56.75%
Epoch:1 Step:256 Training_loss:0.641922, Acc_avg:51.75% Training_loss_avg:0.694731
Epoch:1 Step:264 Training_loss:0.751185, Acc_avg:51.50% Training_loss_avg:0.695741
Epoch:1 Step:272 Training_loss:0.631033, Acc_avg:51.50% Training_loss_avg:0.696697
Epoch:1 Step:280 Training_loss:0.671111, Acc_avg:51.50% Training_loss_avg:0.695548
Epoch:1 Step:288 Training_loss:0.669972, Acc_avg:51.50% Training_loss_avg:0.696114
Epoch:1 Step:296 Training_loss:0.605534, Acc_avg:52.50% Training_loss_avg:0.692975
Epoch:1 Step:304 Training_loss:0.619874, Acc_avg:52.75% Training_loss_avg:0.689708
Epoch:1 Step:312 Training_loss:0.713186, Acc_avg:52.75% Training_loss_avg:0.690807
Epoch:1 Step:320 Training_loss:0.655328, Acc_avg:52.75% Training_loss_avg:0.690398
Epoch:1 Step:328 Training_loss:0.656796, Acc_avg:53.25% Training_loss_avg:0.688325
Epoch:1 Step:336 Training_loss:0.633863, Acc_avg:54.25% Training_loss_avg:0.683143
Epoch:1 Step:344 Training_loss:0

52it [00:07,  6.59it/s]


Epoch:1 Step:496 Val_loss:0.665729, Val_Acc_avg:60.00%
Epoch:1 Step:504 Training_loss:0.626830, Acc_avg:55.00% Training_loss_avg:0.679300
Epoch:1 Step:512 Training_loss:0.746700, Acc_avg:54.75% Training_loss_avg:0.681006
Epoch:1 Step:520 Training_loss:0.709882, Acc_avg:54.75% Training_loss_avg:0.681597
Epoch:1 Step:528 Training_loss:0.733729, Acc_avg:54.00% Training_loss_avg:0.683918
Epoch:1 Step:536 Training_loss:0.701561, Acc_avg:53.75% Training_loss_avg:0.684051
Epoch:1 Step:544 Training_loss:0.683487, Acc_avg:53.25% Training_loss_avg:0.684791
Epoch:1 Step:552 Training_loss:0.682271, Acc_avg:53.25% Training_loss_avg:0.684405
Epoch:1 Step:560 Training_loss:0.635309, Acc_avg:53.25% Training_loss_avg:0.684522
Epoch:1 Step:568 Training_loss:0.638315, Acc_avg:53.50% Training_loss_avg:0.684355
Epoch:1 Step:576 Training_loss:0.703765, Acc_avg:53.50% Training_loss_avg:0.683800
Epoch:1 Step:584 Training_loss:0.665943, Acc_avg:53.00% Training_loss_avg:0.683492
Epoch:1 Step:592 Training_loss:0

52it [00:07,  6.59it/s]


Epoch:1 Step:744 Val_loss:0.665088, Val_Acc_avg:59.50%
Epoch:1 Step:752 Training_loss:0.643999, Acc_avg:54.75% Training_loss_avg:0.684837
Epoch:1 Step:760 Training_loss:0.688733, Acc_avg:54.75% Training_loss_avg:0.684860
Epoch:1 Step:768 Training_loss:0.579722, Acc_avg:55.25% Training_loss_avg:0.682855
Epoch:1 Step:776 Training_loss:0.729722, Acc_avg:54.75% Training_loss_avg:0.684419
Epoch:1 Step:784 Training_loss:0.615396, Acc_avg:54.75% Training_loss_avg:0.683294
Epoch:1 Step:792 Training_loss:0.674422, Acc_avg:54.50% Training_loss_avg:0.683756
Epoch:1 Step:800 Training_loss:0.667595, Acc_avg:54.25% Training_loss_avg:0.683587
Epoch:1 Step:808 Training_loss:0.597201, Acc_avg:54.75% Training_loss_avg:0.683299
Epoch:1 Step:816 Training_loss:0.583679, Acc_avg:55.00% Training_loss_avg:0.682301
Epoch:1 Step:824 Training_loss:0.592321, Acc_avg:55.00% Training_loss_avg:0.680279
Epoch:1 Step:832 Training_loss:0.569303, Acc_avg:55.75% Training_loss_avg:0.676913
Epoch:1 Step:840 Training_loss:0

52it [00:07,  6.59it/s]


Epoch:1 Step:992 Val_loss:0.663789, Val_Acc_avg:58.75%
Epoch:1 Step:1000 Training_loss:0.683121, Acc_avg:56.75% Training_loss_avg:0.676751
Epoch:1 Step:1008 Training_loss:0.642429, Acc_avg:57.75% Training_loss_avg:0.673677
Epoch:1 Step:1016 Training_loss:0.757429, Acc_avg:57.00% Training_loss_avg:0.674285
Epoch:1 Step:1024 Training_loss:0.610748, Acc_avg:56.50% Training_loss_avg:0.673485
Epoch:1 Step:1032 Training_loss:0.650289, Acc_avg:57.25% Training_loss_avg:0.670439
Epoch:1 Step:1040 Training_loss:0.763386, Acc_avg:57.25% Training_loss_avg:0.670827
Epoch:1 Step:1048 Training_loss:0.640066, Acc_avg:56.75% Training_loss_avg:0.670670
Epoch:1 Step:1056 Training_loss:0.814511, Acc_avg:56.25% Training_loss_avg:0.674014
Epoch:1 Step:1064 Training_loss:0.729933, Acc_avg:56.00% Training_loss_avg:0.675014
Epoch:1 Step:1072 Training_loss:0.738527, Acc_avg:55.50% Training_loss_avg:0.677189
Epoch:1 Step:1080 Training_loss:0.645331, Acc_avg:56.25% Training_loss_avg:0.675828
Epoch:1 Step:1088 Tra

52it [00:07,  6.58it/s]


Epoch:1 Step:1240 Val_loss:0.662679, Val_Acc_avg:60.50%
Epoch:1 Step:1248 Training_loss:0.789310, Acc_avg:51.75% Training_loss_avg:0.695393
Epoch:1 Step:1256 Training_loss:0.642661, Acc_avg:52.25% Training_loss_avg:0.692460
Epoch:1 Step:1264 Training_loss:0.575332, Acc_avg:52.75% Training_loss_avg:0.690633
Epoch:1 Step:1272 Training_loss:0.698910, Acc_avg:52.00% Training_loss_avg:0.692086
Epoch:1 Step:1280 Training_loss:0.743283, Acc_avg:52.50% Training_loss_avg:0.692096
Epoch:1 Step:1288 Training_loss:0.701550, Acc_avg:52.25% Training_loss_avg:0.694520
Epoch:1 Step:1296 Training_loss:0.674797, Acc_avg:52.00% Training_loss_avg:0.695057
Epoch:1 Step:1304 Training_loss:0.737936, Acc_avg:52.00% Training_loss_avg:0.693465
Epoch:1 Step:1312 Training_loss:0.603086, Acc_avg:52.75% Training_loss_avg:0.690863
Epoch:1 Step:1320 Training_loss:0.729253, Acc_avg:53.00% Training_loss_avg:0.691955
Epoch:1 Step:1328 Training_loss:0.574128, Acc_avg:53.50% Training_loss_avg:0.691801
Epoch:1 Step:1336 Tr

52it [00:07,  6.58it/s]


Epoch:1 Step:1488 Val_loss:0.671508, Val_Acc_avg:56.00%
Epoch:1 Step:1496 Training_loss:0.634760, Acc_avg:56.50% Training_loss_avg:0.678728
Epoch:1 Step:1504 Training_loss:0.603847, Acc_avg:56.75% Training_loss_avg:0.676828
Epoch:1 Step:1512 Training_loss:0.666131, Acc_avg:56.75% Training_loss_avg:0.676117
Epoch:1 Step:1520 Training_loss:0.777647, Acc_avg:56.00% Training_loss_avg:0.677320
Epoch:1 Step:1528 Training_loss:0.660640, Acc_avg:56.50% Training_loss_avg:0.676059
Epoch:1 Step:1536 Training_loss:0.662796, Acc_avg:56.75% Training_loss_avg:0.675925
Epoch:1 Step:1544 Training_loss:0.696739, Acc_avg:56.50% Training_loss_avg:0.675936
Epoch:1 Step:1552 Training_loss:0.653267, Acc_avg:56.50% Training_loss_avg:0.675778
Epoch:1 Step:1560 Training_loss:0.666522, Acc_avg:56.75% Training_loss_avg:0.675547
Epoch:1 Step:1568 Training_loss:0.744836, Acc_avg:56.00% Training_loss_avg:0.676883
Epoch:1 Step:1576 Training_loss:0.624579, Acc_avg:56.50% Training_loss_avg:0.674863
Epoch:1 Step:1584 Tr

52it [00:07,  6.59it/s]


Epoch:1 Step:1736 Val_loss:0.662667, Val_Acc_avg:58.25%
Epoch:1 Step:1744 Training_loss:0.656394, Acc_avg:56.75% Training_loss_avg:0.673338
Epoch:1 Step:1752 Training_loss:0.650483, Acc_avg:56.50% Training_loss_avg:0.675212
Epoch:1 Step:1760 Training_loss:0.660601, Acc_avg:56.50% Training_loss_avg:0.675740
Epoch:1 Step:1768 Training_loss:0.604528, Acc_avg:56.00% Training_loss_avg:0.675268
Epoch:1 Step:1776 Training_loss:0.604975, Acc_avg:56.50% Training_loss_avg:0.671481
Epoch:1 Step:1784 Training_loss:0.625749, Acc_avg:56.75% Training_loss_avg:0.669875
Epoch:1 Step:1792 Training_loss:0.815753, Acc_avg:56.25% Training_loss_avg:0.671133
Epoch:1 Step:1800 Training_loss:0.647881, Acc_avg:57.00% Training_loss_avg:0.669300
Epoch:1 Step:1808 Training_loss:0.857058, Acc_avg:56.50% Training_loss_avg:0.674323
Epoch:1 Step:1816 Training_loss:0.625049, Acc_avg:55.75% Training_loss_avg:0.675326
Epoch:1 Step:1824 Training_loss:0.663134, Acc_avg:56.25% Training_loss_avg:0.673397
Epoch:1 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:1984 Val_loss:0.658206, Val_Acc_avg:60.25%
Epoch:1 Step:1992 Training_loss:0.674586, Acc_avg:56.50% Training_loss_avg:0.681913
Epoch:1 Step:2000 Training_loss:0.711373, Acc_avg:56.50% Training_loss_avg:0.682151
Epoch:1 Step:2008 Training_loss:0.688493, Acc_avg:56.50% Training_loss_avg:0.682738
Epoch:1 Step:2016 Training_loss:0.641360, Acc_avg:56.75% Training_loss_avg:0.681965
Epoch:1 Step:2024 Training_loss:0.658816, Acc_avg:56.50% Training_loss_avg:0.683002
Epoch:1 Step:2032 Training_loss:0.696646, Acc_avg:56.25% Training_loss_avg:0.684836
Epoch:1 Step:2040 Training_loss:0.696180, Acc_avg:56.25% Training_loss_avg:0.683822
Epoch:1 Step:2048 Training_loss:0.604972, Acc_avg:56.50% Training_loss_avg:0.683016
Epoch:1 Step:2056 Training_loss:0.637192, Acc_avg:56.00% Training_loss_avg:0.683545
Epoch:1 Step:2064 Training_loss:0.568646, Acc_avg:56.75% Training_loss_avg:0.681343
Epoch:1 Step:2072 Training_loss:0.637292, Acc_avg:57.50% Training_loss_avg:0.678808
Epoch:1 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2232 Val_loss:0.667165, Val_Acc_avg:56.50%
Epoch:1 Step:2240 Training_loss:0.648566, Acc_avg:58.50% Training_loss_avg:0.681675
Epoch:1 Step:2248 Training_loss:0.589514, Acc_avg:58.50% Training_loss_avg:0.680332
Epoch:1 Step:2256 Training_loss:0.666388, Acc_avg:58.50% Training_loss_avg:0.679291
Epoch:1 Step:2264 Training_loss:0.765136, Acc_avg:57.50% Training_loss_avg:0.683562
Epoch:1 Step:2272 Training_loss:0.846652, Acc_avg:56.00% Training_loss_avg:0.688594
Epoch:1 Step:2280 Training_loss:0.856990, Acc_avg:55.00% Training_loss_avg:0.691180
Epoch:1 Step:2288 Training_loss:0.769909, Acc_avg:54.75% Training_loss_avg:0.692744
Epoch:1 Step:2296 Training_loss:0.643400, Acc_avg:55.25% Training_loss_avg:0.691217
Epoch:1 Step:2304 Training_loss:0.703010, Acc_avg:55.25% Training_loss_avg:0.692744
Epoch:1 Step:2312 Training_loss:0.640972, Acc_avg:55.75% Training_loss_avg:0.691072
Epoch:1 Step:2320 Training_loss:0.711671, Acc_avg:55.50% Training_loss_avg:0.688657
Epoch:1 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2480 Val_loss:0.666651, Val_Acc_avg:57.00%
Epoch:1 Step:2488 Training_loss:0.638289, Acc_avg:57.75% Training_loss_avg:0.676897
Epoch:1 Step:2496 Training_loss:0.712766, Acc_avg:57.50% Training_loss_avg:0.678681
Epoch:1 Step:2504 Training_loss:0.640046, Acc_avg:57.75% Training_loss_avg:0.675543
Epoch:1 Step:2512 Training_loss:0.559559, Acc_avg:59.00% Training_loss_avg:0.670763
Epoch:1 Step:2520 Training_loss:0.680953, Acc_avg:58.50% Training_loss_avg:0.674024
Epoch:1 Step:2528 Training_loss:0.787992, Acc_avg:58.25% Training_loss_avg:0.674136
Epoch:1 Step:2536 Training_loss:0.752569, Acc_avg:58.00% Training_loss_avg:0.677236
Epoch:1 Step:2544 Training_loss:0.508780, Acc_avg:58.50% Training_loss_avg:0.673375
Epoch:1 Step:2552 Training_loss:0.651756, Acc_avg:58.50% Training_loss_avg:0.672279
Epoch:1 Step:2560 Training_loss:0.731714, Acc_avg:58.25% Training_loss_avg:0.674343
Epoch:1 Step:2568 Training_loss:0.641110, Acc_avg:58.25% Training_loss_avg:0.674395
Epoch:1 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2728 Val_loss:0.664282, Val_Acc_avg:57.25%
Epoch:1 Step:2736 Training_loss:0.684702, Acc_avg:62.50% Training_loss_avg:0.655297
Epoch:1 Step:2744 Training_loss:0.791054, Acc_avg:61.50% Training_loss_avg:0.658945
Epoch:1 Step:2752 Training_loss:0.620422, Acc_avg:61.25% Training_loss_avg:0.658915
Epoch:1 Step:2760 Training_loss:0.810071, Acc_avg:60.50% Training_loss_avg:0.663568
Epoch:1 Step:2768 Training_loss:0.660965, Acc_avg:60.50% Training_loss_avg:0.662840
Epoch:1 Step:2776 Training_loss:0.628292, Acc_avg:61.00% Training_loss_avg:0.661607
Epoch:1 Step:2784 Training_loss:0.570386, Acc_avg:60.25% Training_loss_avg:0.660393
Epoch:1 Step:2792 Training_loss:0.668372, Acc_avg:60.00% Training_loss_avg:0.661472
Epoch:1 Step:2800 Training_loss:0.773089, Acc_avg:59.75% Training_loss_avg:0.664737
Epoch:1 Step:2808 Training_loss:0.786810, Acc_avg:59.00% Training_loss_avg:0.669107
Epoch:1 Step:2816 Training_loss:0.624284, Acc_avg:59.00% Training_loss_avg:0.667559
Epoch:1 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:2976 Val_loss:0.643000, Val_Acc_avg:62.00%
Epoch:1 Step:2984 Training_loss:0.689378, Acc_avg:56.50% Training_loss_avg:0.670682
Epoch:1 Step:2992 Training_loss:0.551636, Acc_avg:56.75% Training_loss_avg:0.668138
Epoch:1 Step:3000 Training_loss:0.812574, Acc_avg:56.00% Training_loss_avg:0.670913
Epoch:1 Step:3008 Training_loss:0.628397, Acc_avg:56.25% Training_loss_avg:0.669707
Epoch:1 Step:3016 Training_loss:0.691250, Acc_avg:56.50% Training_loss_avg:0.668418
Epoch:1 Step:3024 Training_loss:0.611835, Acc_avg:57.25% Training_loss_avg:0.667437
Epoch:1 Step:3032 Training_loss:0.777401, Acc_avg:56.75% Training_loss_avg:0.670882
Epoch:1 Step:3040 Training_loss:0.607970, Acc_avg:57.75% Training_loss_avg:0.667400
Epoch:1 Step:3048 Training_loss:0.710282, Acc_avg:57.50% Training_loss_avg:0.668353
Epoch:1 Step:3056 Training_loss:0.602846, Acc_avg:57.50% Training_loss_avg:0.667836
Epoch:1 Step:3064 Training_loss:0.540529, Acc_avg:58.25% Training_loss_avg:0.665536
Epoch:1 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:3224 Val_loss:0.646502, Val_Acc_avg:60.25%
Epoch:1 Step:3232 Training_loss:0.697606, Acc_avg:60.25% Training_loss_avg:0.662469
Epoch:1 Step:3240 Training_loss:0.507647, Acc_avg:61.25% Training_loss_avg:0.659373
Epoch:1 Step:3248 Training_loss:0.646221, Acc_avg:61.75% Training_loss_avg:0.658067
Epoch:1 Step:3256 Training_loss:0.657979, Acc_avg:61.50% Training_loss_avg:0.658189
Epoch:1 Step:3264 Training_loss:0.691806, Acc_avg:60.75% Training_loss_avg:0.659452
Epoch:1 Step:3272 Training_loss:0.793728, Acc_avg:60.25% Training_loss_avg:0.663574
Epoch:1 Step:3280 Training_loss:0.532688, Acc_avg:60.25% Training_loss_avg:0.661654
Epoch:1 Step:3288 Training_loss:0.559265, Acc_avg:61.75% Training_loss_avg:0.659795
Epoch:1 Step:3296 Training_loss:0.631938, Acc_avg:61.75% Training_loss_avg:0.658441
Epoch:1 Step:3304 Training_loss:0.598752, Acc_avg:62.50% Training_loss_avg:0.654840
Epoch:1 Step:3312 Training_loss:0.576377, Acc_avg:62.75% Training_loss_avg:0.651750
Epoch:1 Step:3320 Tr

52it [00:07,  6.59it/s]


Epoch:2 Step:0 Val_loss:0.684525, Val_Acc_avg:57.75%
Epoch:2 Step:8 Training_loss:0.557176, Acc_avg:64.33% Training_loss_avg:0.647808
Epoch:2 Step:16 Training_loss:0.501033, Acc_avg:64.33% Training_loss_avg:0.644151
Epoch:2 Step:24 Training_loss:0.627333, Acc_avg:63.83% Training_loss_avg:0.644759
Epoch:2 Step:32 Training_loss:0.450753, Acc_avg:64.58% Training_loss_avg:0.639790
Epoch:2 Step:40 Training_loss:0.620451, Acc_avg:65.08% Training_loss_avg:0.638714
Epoch:2 Step:48 Training_loss:0.932839, Acc_avg:64.58% Training_loss_avg:0.643583
Epoch:2 Step:56 Training_loss:0.989215, Acc_avg:63.58% Training_loss_avg:0.652335
Epoch:2 Step:64 Training_loss:0.787712, Acc_avg:63.83% Training_loss_avg:0.651838
Epoch:2 Step:72 Training_loss:0.587082, Acc_avg:63.83% Training_loss_avg:0.651011
Epoch:2 Step:80 Training_loss:0.570913, Acc_avg:64.33% Training_loss_avg:0.648604
Epoch:2 Step:88 Training_loss:0.667686, Acc_avg:63.83% Training_loss_avg:0.649721
Epoch:2 Step:96 Training_loss:0.459479, Acc_av

52it [00:07,  6.60it/s]


Epoch:2 Step:248 Val_loss:0.646246, Val_Acc_avg:62.00%
Epoch:2 Step:256 Training_loss:0.661080, Acc_avg:61.58% Training_loss_avg:0.650365
Epoch:2 Step:264 Training_loss:0.667061, Acc_avg:61.08% Training_loss_avg:0.651249
Epoch:2 Step:272 Training_loss:0.697899, Acc_avg:60.33% Training_loss_avg:0.654109
Epoch:2 Step:280 Training_loss:0.641478, Acc_avg:59.83% Training_loss_avg:0.656454
Epoch:2 Step:288 Training_loss:0.580229, Acc_avg:59.83% Training_loss_avg:0.654990
Epoch:2 Step:296 Training_loss:0.643922, Acc_avg:60.33% Training_loss_avg:0.653917
Epoch:2 Step:304 Training_loss:0.679420, Acc_avg:59.58% Training_loss_avg:0.657352
Epoch:2 Step:312 Training_loss:0.636688, Acc_avg:60.08% Training_loss_avg:0.657161
Epoch:2 Step:320 Training_loss:0.627294, Acc_avg:60.33% Training_loss_avg:0.656548
Epoch:2 Step:328 Training_loss:0.597732, Acc_avg:61.08% Training_loss_avg:0.654666
Epoch:2 Step:336 Training_loss:0.709730, Acc_avg:60.83% Training_loss_avg:0.652986
Epoch:2 Step:344 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:2 Step:496 Val_loss:0.655601, Val_Acc_avg:59.25%
Epoch:2 Step:504 Training_loss:0.604433, Acc_avg:58.25% Training_loss_avg:0.664605
Epoch:2 Step:512 Training_loss:0.546470, Acc_avg:58.50% Training_loss_avg:0.664060
Epoch:2 Step:520 Training_loss:0.680046, Acc_avg:58.50% Training_loss_avg:0.664810
Epoch:2 Step:528 Training_loss:0.589282, Acc_avg:59.25% Training_loss_avg:0.659661
Epoch:2 Step:536 Training_loss:0.722108, Acc_avg:60.00% Training_loss_avg:0.655711
Epoch:2 Step:544 Training_loss:0.506855, Acc_avg:60.00% Training_loss_avg:0.655179
Epoch:2 Step:552 Training_loss:0.574162, Acc_avg:60.50% Training_loss_avg:0.649961
Epoch:2 Step:560 Training_loss:0.643464, Acc_avg:60.75% Training_loss_avg:0.649790
Epoch:2 Step:568 Training_loss:0.642967, Acc_avg:60.50% Training_loss_avg:0.649439
Epoch:2 Step:576 Training_loss:0.731196, Acc_avg:61.00% Training_loss_avg:0.648525
Epoch:2 Step:584 Training_loss:0.414280, Acc_avg:62.25% Training_loss_avg:0.642378
Epoch:2 Step:592 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:2 Step:744 Val_loss:0.639265, Val_Acc_avg:61.00%
Epoch:2 Step:752 Training_loss:0.666310, Acc_avg:62.50% Training_loss_avg:0.657265
Epoch:2 Step:760 Training_loss:0.649982, Acc_avg:63.00% Training_loss_avg:0.656633
Epoch:2 Step:768 Training_loss:0.529128, Acc_avg:63.25% Training_loss_avg:0.655777
Epoch:2 Step:776 Training_loss:0.632616, Acc_avg:63.75% Training_loss_avg:0.653912
Epoch:2 Step:784 Training_loss:0.718792, Acc_avg:63.25% Training_loss_avg:0.654893
Epoch:2 Step:792 Training_loss:0.675011, Acc_avg:62.50% Training_loss_avg:0.655313
Epoch:2 Step:800 Training_loss:0.783039, Acc_avg:62.00% Training_loss_avg:0.657246
Epoch:2 Step:808 Training_loss:0.564054, Acc_avg:62.75% Training_loss_avg:0.655199
Epoch:2 Step:816 Training_loss:0.717049, Acc_avg:62.00% Training_loss_avg:0.657672
Epoch:2 Step:824 Training_loss:0.725210, Acc_avg:61.25% Training_loss_avg:0.660461
Epoch:2 Step:832 Training_loss:0.635194, Acc_avg:61.50% Training_loss_avg:0.659320
Epoch:2 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:2 Step:992 Val_loss:0.635198, Val_Acc_avg:64.00%
Epoch:2 Step:1000 Training_loss:0.657653, Acc_avg:57.00% Training_loss_avg:0.677412
Epoch:2 Step:1008 Training_loss:0.714033, Acc_avg:57.00% Training_loss_avg:0.675778
Epoch:2 Step:1016 Training_loss:0.640955, Acc_avg:56.75% Training_loss_avg:0.672497
Epoch:2 Step:1024 Training_loss:0.645160, Acc_avg:57.50% Training_loss_avg:0.671041
Epoch:2 Step:1032 Training_loss:0.673368, Acc_avg:57.25% Training_loss_avg:0.671054
Epoch:2 Step:1040 Training_loss:0.643817, Acc_avg:57.25% Training_loss_avg:0.671655
Epoch:2 Step:1048 Training_loss:0.612087, Acc_avg:57.50% Training_loss_avg:0.669183
Epoch:2 Step:1056 Training_loss:0.788553, Acc_avg:56.25% Training_loss_avg:0.674291
Epoch:2 Step:1064 Training_loss:0.694930, Acc_avg:56.50% Training_loss_avg:0.672140
Epoch:2 Step:1072 Training_loss:0.528259, Acc_avg:56.75% Training_loss_avg:0.670656
Epoch:2 Step:1080 Training_loss:0.653364, Acc_avg:57.00% Training_loss_avg:0.669590
Epoch:2 Step:1088 Tra

52it [00:07,  6.60it/s]


Epoch:2 Step:1240 Val_loss:0.637440, Val_Acc_avg:61.00%
Epoch:2 Step:1248 Training_loss:0.684691, Acc_avg:59.25% Training_loss_avg:0.652814
Epoch:2 Step:1256 Training_loss:0.709338, Acc_avg:59.00% Training_loss_avg:0.654313
Epoch:2 Step:1264 Training_loss:0.597030, Acc_avg:59.25% Training_loss_avg:0.652307
Epoch:2 Step:1272 Training_loss:0.601262, Acc_avg:59.75% Training_loss_avg:0.650011
Epoch:2 Step:1280 Training_loss:0.709549, Acc_avg:59.00% Training_loss_avg:0.651896
Epoch:2 Step:1288 Training_loss:0.630122, Acc_avg:59.50% Training_loss_avg:0.650072
Epoch:2 Step:1296 Training_loss:0.753205, Acc_avg:58.25% Training_loss_avg:0.652006
Epoch:2 Step:1304 Training_loss:0.695171, Acc_avg:57.75% Training_loss_avg:0.652531
Epoch:2 Step:1312 Training_loss:0.524317, Acc_avg:58.25% Training_loss_avg:0.650623
Epoch:2 Step:1320 Training_loss:0.604856, Acc_avg:59.25% Training_loss_avg:0.648702
Epoch:2 Step:1328 Training_loss:0.644262, Acc_avg:59.00% Training_loss_avg:0.647901
Epoch:2 Step:1336 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:1488 Val_loss:0.640457, Val_Acc_avg:60.50%
Epoch:2 Step:1496 Training_loss:0.696830, Acc_avg:61.75% Training_loss_avg:0.644854
Epoch:2 Step:1504 Training_loss:0.869593, Acc_avg:60.75% Training_loss_avg:0.650994
Epoch:2 Step:1512 Training_loss:0.542969, Acc_avg:60.75% Training_loss_avg:0.651547
Epoch:2 Step:1520 Training_loss:0.630970, Acc_avg:60.50% Training_loss_avg:0.651457
Epoch:2 Step:1528 Training_loss:0.513517, Acc_avg:61.00% Training_loss_avg:0.648928
Epoch:2 Step:1536 Training_loss:0.760797, Acc_avg:60.00% Training_loss_avg:0.651542
Epoch:2 Step:1544 Training_loss:0.544580, Acc_avg:61.00% Training_loss_avg:0.646245
Epoch:2 Step:1552 Training_loss:0.632596, Acc_avg:60.75% Training_loss_avg:0.645190
Epoch:2 Step:1560 Training_loss:0.602771, Acc_avg:61.75% Training_loss_avg:0.642061
Epoch:2 Step:1568 Training_loss:0.614537, Acc_avg:61.75% Training_loss_avg:0.642116
Epoch:2 Step:1576 Training_loss:0.542344, Acc_avg:62.50% Training_loss_avg:0.639961
Epoch:2 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:1736 Val_loss:0.626597, Val_Acc_avg:62.25%
Epoch:2 Step:1744 Training_loss:0.689627, Acc_avg:62.00% Training_loss_avg:0.641061
Epoch:2 Step:1752 Training_loss:0.607630, Acc_avg:61.50% Training_loss_avg:0.639529
Epoch:2 Step:1760 Training_loss:0.711392, Acc_avg:61.50% Training_loss_avg:0.641587
Epoch:2 Step:1768 Training_loss:0.711635, Acc_avg:62.00% Training_loss_avg:0.638153
Epoch:2 Step:1776 Training_loss:0.572959, Acc_avg:62.50% Training_loss_avg:0.636299
Epoch:2 Step:1784 Training_loss:0.775043, Acc_avg:62.25% Training_loss_avg:0.639725
Epoch:2 Step:1792 Training_loss:0.392075, Acc_avg:62.75% Training_loss_avg:0.635502
Epoch:2 Step:1800 Training_loss:0.470004, Acc_avg:62.75% Training_loss_avg:0.632015
Epoch:2 Step:1808 Training_loss:0.700143, Acc_avg:62.00% Training_loss_avg:0.634518
Epoch:2 Step:1816 Training_loss:0.649962, Acc_avg:61.50% Training_loss_avg:0.636004
Epoch:2 Step:1824 Training_loss:0.594503, Acc_avg:61.00% Training_loss_avg:0.636529
Epoch:2 Step:1832 Tr

52it [00:07,  6.59it/s]


Epoch:2 Step:1984 Val_loss:0.637943, Val_Acc_avg:61.00%
Epoch:2 Step:1992 Training_loss:0.776828, Acc_avg:61.25% Training_loss_avg:0.641127
Epoch:2 Step:2000 Training_loss:0.569265, Acc_avg:61.75% Training_loss_avg:0.637166
Epoch:2 Step:2008 Training_loss:0.479328, Acc_avg:62.50% Training_loss_avg:0.630391
Epoch:2 Step:2016 Training_loss:0.476323, Acc_avg:63.50% Training_loss_avg:0.624482
Epoch:2 Step:2024 Training_loss:0.612505, Acc_avg:64.25% Training_loss_avg:0.621935
Epoch:2 Step:2032 Training_loss:0.473861, Acc_avg:65.25% Training_loss_avg:0.617931
Epoch:2 Step:2040 Training_loss:0.482601, Acc_avg:64.75% Training_loss_avg:0.617226
Epoch:2 Step:2048 Training_loss:0.482931, Acc_avg:65.00% Training_loss_avg:0.615591
Epoch:2 Step:2056 Training_loss:0.591185, Acc_avg:65.00% Training_loss_avg:0.614470
Epoch:2 Step:2064 Training_loss:0.927938, Acc_avg:64.25% Training_loss_avg:0.622639
Epoch:2 Step:2072 Training_loss:0.568888, Acc_avg:64.50% Training_loss_avg:0.620814
Epoch:2 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2232 Val_loss:0.631375, Val_Acc_avg:61.50%
Epoch:2 Step:2240 Training_loss:0.832997, Acc_avg:63.00% Training_loss_avg:0.645715
Epoch:2 Step:2248 Training_loss:0.791019, Acc_avg:63.00% Training_loss_avg:0.646669
Epoch:2 Step:2256 Training_loss:0.863883, Acc_avg:62.75% Training_loss_avg:0.648104
Epoch:2 Step:2264 Training_loss:0.532735, Acc_avg:63.25% Training_loss_avg:0.647448
Epoch:2 Step:2272 Training_loss:0.556230, Acc_avg:63.25% Training_loss_avg:0.646738
Epoch:2 Step:2280 Training_loss:0.684141, Acc_avg:63.50% Training_loss_avg:0.648289
Epoch:2 Step:2288 Training_loss:0.638413, Acc_avg:63.75% Training_loss_avg:0.647373
Epoch:2 Step:2296 Training_loss:0.797219, Acc_avg:63.25% Training_loss_avg:0.649587
Epoch:2 Step:2304 Training_loss:0.612548, Acc_avg:63.00% Training_loss_avg:0.650710
Epoch:2 Step:2312 Training_loss:0.580174, Acc_avg:63.50% Training_loss_avg:0.647723
Epoch:2 Step:2320 Training_loss:0.766569, Acc_avg:63.25% Training_loss_avg:0.651013
Epoch:2 Step:2328 Tr

52it [00:07,  6.59it/s]


Epoch:2 Step:2480 Val_loss:0.645084, Val_Acc_avg:60.75%
Epoch:2 Step:2488 Training_loss:0.519832, Acc_avg:60.50% Training_loss_avg:0.655711
Epoch:2 Step:2496 Training_loss:0.628449, Acc_avg:60.00% Training_loss_avg:0.659138
Epoch:2 Step:2504 Training_loss:0.770367, Acc_avg:60.00% Training_loss_avg:0.658691
Epoch:2 Step:2512 Training_loss:0.844617, Acc_avg:58.75% Training_loss_avg:0.663853
Epoch:2 Step:2520 Training_loss:0.456652, Acc_avg:59.50% Training_loss_avg:0.654725
Epoch:2 Step:2528 Training_loss:0.671696, Acc_avg:59.25% Training_loss_avg:0.656268
Epoch:2 Step:2536 Training_loss:0.537777, Acc_avg:59.50% Training_loss_avg:0.653095
Epoch:2 Step:2544 Training_loss:0.647130, Acc_avg:60.00% Training_loss_avg:0.646244
Epoch:2 Step:2552 Training_loss:0.671135, Acc_avg:59.75% Training_loss_avg:0.647199
Epoch:2 Step:2560 Training_loss:0.658425, Acc_avg:59.75% Training_loss_avg:0.646201
Epoch:2 Step:2568 Training_loss:0.791925, Acc_avg:60.00% Training_loss_avg:0.646883
Epoch:2 Step:2576 Tr

52it [00:07,  6.59it/s]


Epoch:2 Step:2728 Val_loss:0.611905, Val_Acc_avg:64.25%
Epoch:2 Step:2736 Training_loss:0.751115, Acc_avg:58.25% Training_loss_avg:0.650561
Epoch:2 Step:2744 Training_loss:0.603341, Acc_avg:58.50% Training_loss_avg:0.648823
Epoch:2 Step:2752 Training_loss:0.650649, Acc_avg:58.75% Training_loss_avg:0.649040
Epoch:2 Step:2760 Training_loss:0.719846, Acc_avg:58.75% Training_loss_avg:0.650132
Epoch:2 Step:2768 Training_loss:0.725143, Acc_avg:58.25% Training_loss_avg:0.654380
Epoch:2 Step:2776 Training_loss:0.589044, Acc_avg:58.25% Training_loss_avg:0.653490
Epoch:2 Step:2784 Training_loss:0.577491, Acc_avg:58.25% Training_loss_avg:0.653748
Epoch:2 Step:2792 Training_loss:0.724843, Acc_avg:58.00% Training_loss_avg:0.653112
Epoch:2 Step:2800 Training_loss:0.840433, Acc_avg:56.75% Training_loss_avg:0.659561
Epoch:2 Step:2808 Training_loss:0.436179, Acc_avg:56.75% Training_loss_avg:0.656393
Epoch:2 Step:2816 Training_loss:0.476033, Acc_avg:56.50% Training_loss_avg:0.655799
Epoch:2 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:2976 Val_loss:0.618306, Val_Acc_avg:63.00%
Epoch:2 Step:2984 Training_loss:0.523133, Acc_avg:60.25% Training_loss_avg:0.646849
Epoch:2 Step:2992 Training_loss:0.660195, Acc_avg:60.25% Training_loss_avg:0.647259
Epoch:2 Step:3000 Training_loss:0.701703, Acc_avg:60.50% Training_loss_avg:0.646799
Epoch:2 Step:3008 Training_loss:0.697297, Acc_avg:60.25% Training_loss_avg:0.646646
Epoch:2 Step:3016 Training_loss:0.654329, Acc_avg:60.00% Training_loss_avg:0.648330
Epoch:2 Step:3024 Training_loss:0.609113, Acc_avg:60.00% Training_loss_avg:0.646956
Epoch:2 Step:3032 Training_loss:0.636620, Acc_avg:60.25% Training_loss_avg:0.645259
Epoch:2 Step:3040 Training_loss:0.554222, Acc_avg:61.00% Training_loss_avg:0.640986
Epoch:2 Step:3048 Training_loss:0.551681, Acc_avg:61.50% Training_loss_avg:0.637933
Epoch:2 Step:3056 Training_loss:0.646593, Acc_avg:61.25% Training_loss_avg:0.640491
Epoch:2 Step:3064 Training_loss:0.633448, Acc_avg:60.50% Training_loss_avg:0.643576
Epoch:2 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:2 Step:3224 Val_loss:0.651008, Val_Acc_avg:60.75%
Epoch:2 Step:3232 Training_loss:0.632353, Acc_avg:63.25% Training_loss_avg:0.619488
Epoch:2 Step:3240 Training_loss:0.641593, Acc_avg:63.25% Training_loss_avg:0.620636
Epoch:2 Step:3248 Training_loss:0.463937, Acc_avg:63.25% Training_loss_avg:0.615727
Epoch:2 Step:3256 Training_loss:0.904933, Acc_avg:62.75% Training_loss_avg:0.617906
Epoch:2 Step:3264 Training_loss:0.667053, Acc_avg:62.00% Training_loss_avg:0.619900
Epoch:2 Step:3272 Training_loss:0.628160, Acc_avg:61.75% Training_loss_avg:0.619573
Epoch:2 Step:3280 Training_loss:0.715389, Acc_avg:61.50% Training_loss_avg:0.620784
Epoch:2 Step:3288 Training_loss:0.837750, Acc_avg:61.25% Training_loss_avg:0.623990
Epoch:2 Step:3296 Training_loss:0.590619, Acc_avg:61.25% Training_loss_avg:0.625084
Epoch:2 Step:3304 Training_loss:0.640610, Acc_avg:61.50% Training_loss_avg:0.622639
Epoch:2 Step:3312 Training_loss:0.393041, Acc_avg:62.50% Training_loss_avg:0.617525
Epoch:2 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:0 Val_loss:0.619924, Val_Acc_avg:61.50%
Epoch:3 Step:8 Training_loss:0.506097, Acc_avg:63.25% Training_loss_avg:0.615748
Epoch:3 Step:16 Training_loss:0.670680, Acc_avg:62.75% Training_loss_avg:0.617635
Epoch:3 Step:24 Training_loss:0.674736, Acc_avg:63.00% Training_loss_avg:0.617922
Epoch:3 Step:32 Training_loss:0.744041, Acc_avg:63.00% Training_loss_avg:0.619208
Epoch:3 Step:40 Training_loss:0.619308, Acc_avg:62.50% Training_loss_avg:0.619683
Epoch:3 Step:48 Training_loss:0.622827, Acc_avg:62.25% Training_loss_avg:0.621677
Epoch:3 Step:56 Training_loss:0.754976, Acc_avg:62.25% Training_loss_avg:0.623573
Epoch:3 Step:64 Training_loss:0.530661, Acc_avg:62.75% Training_loss_avg:0.620152
Epoch:3 Step:72 Training_loss:0.492735, Acc_avg:63.25% Training_loss_avg:0.616061
Epoch:3 Step:80 Training_loss:0.659962, Acc_avg:63.25% Training_loss_avg:0.616173
Epoch:3 Step:88 Training_loss:0.592176, Acc_avg:63.50% Training_loss_avg:0.615835
Epoch:3 Step:96 Training_loss:0.397239, Acc_av

52it [00:07,  6.59it/s]


Epoch:3 Step:248 Val_loss:0.615007, Val_Acc_avg:62.75%
Epoch:3 Step:256 Training_loss:0.589949, Acc_avg:64.00% Training_loss_avg:0.612895
Epoch:3 Step:264 Training_loss:0.636292, Acc_avg:64.25% Training_loss_avg:0.612068
Epoch:3 Step:272 Training_loss:0.533507, Acc_avg:64.75% Training_loss_avg:0.611720
Epoch:3 Step:280 Training_loss:0.503253, Acc_avg:64.75% Training_loss_avg:0.610314
Epoch:3 Step:288 Training_loss:0.694299, Acc_avg:64.00% Training_loss_avg:0.613659
Epoch:3 Step:296 Training_loss:0.574656, Acc_avg:64.00% Training_loss_avg:0.612505
Epoch:3 Step:304 Training_loss:0.501104, Acc_avg:64.50% Training_loss_avg:0.609695
Epoch:3 Step:312 Training_loss:0.736399, Acc_avg:64.75% Training_loss_avg:0.615144
Epoch:3 Step:320 Training_loss:0.638013, Acc_avg:65.50% Training_loss_avg:0.609806
Epoch:3 Step:328 Training_loss:0.600541, Acc_avg:66.00% Training_loss_avg:0.608475
Epoch:3 Step:336 Training_loss:0.648783, Acc_avg:66.00% Training_loss_avg:0.608888
Epoch:3 Step:344 Training_loss:0

52it [00:07,  6.59it/s]


Epoch:3 Step:496 Val_loss:0.609528, Val_Acc_avg:63.25%
Epoch:3 Step:504 Training_loss:0.537651, Acc_avg:63.00% Training_loss_avg:0.617115
Epoch:3 Step:512 Training_loss:0.545126, Acc_avg:63.50% Training_loss_avg:0.612086
Epoch:3 Step:520 Training_loss:0.667391, Acc_avg:62.75% Training_loss_avg:0.613586
Epoch:3 Step:528 Training_loss:0.507566, Acc_avg:63.00% Training_loss_avg:0.610679
Epoch:3 Step:536 Training_loss:0.604533, Acc_avg:63.50% Training_loss_avg:0.609651
Epoch:3 Step:544 Training_loss:0.568530, Acc_avg:63.50% Training_loss_avg:0.606493
Epoch:3 Step:552 Training_loss:0.469686, Acc_avg:64.00% Training_loss_avg:0.605537
Epoch:3 Step:560 Training_loss:0.693282, Acc_avg:63.25% Training_loss_avg:0.611356
Epoch:3 Step:568 Training_loss:0.630133, Acc_avg:63.50% Training_loss_avg:0.611637
Epoch:3 Step:576 Training_loss:0.411837, Acc_avg:64.25% Training_loss_avg:0.606852
Epoch:3 Step:584 Training_loss:0.476085, Acc_avg:65.00% Training_loss_avg:0.604077
Epoch:3 Step:592 Training_loss:0

52it [00:07,  6.59it/s]


Epoch:3 Step:744 Val_loss:0.628151, Val_Acc_avg:61.25%
Epoch:3 Step:752 Training_loss:0.552952, Acc_avg:62.25% Training_loss_avg:0.613721
Epoch:3 Step:760 Training_loss:0.523092, Acc_avg:62.50% Training_loss_avg:0.612878
Epoch:3 Step:768 Training_loss:0.656576, Acc_avg:61.75% Training_loss_avg:0.615004
Epoch:3 Step:776 Training_loss:0.713751, Acc_avg:62.75% Training_loss_avg:0.613532
Epoch:3 Step:784 Training_loss:0.713022, Acc_avg:62.75% Training_loss_avg:0.610267
Epoch:3 Step:792 Training_loss:0.544779, Acc_avg:62.75% Training_loss_avg:0.610293
Epoch:3 Step:800 Training_loss:0.625701, Acc_avg:63.00% Training_loss_avg:0.611183
Epoch:3 Step:808 Training_loss:0.656651, Acc_avg:62.75% Training_loss_avg:0.613259
Epoch:3 Step:816 Training_loss:0.713141, Acc_avg:62.50% Training_loss_avg:0.615119
Epoch:3 Step:824 Training_loss:0.552770, Acc_avg:63.25% Training_loss_avg:0.610360
Epoch:3 Step:832 Training_loss:0.649768, Acc_avg:64.25% Training_loss_avg:0.608044
Epoch:3 Step:840 Training_loss:0

52it [00:07,  6.58it/s]


Epoch:3 Step:992 Val_loss:0.630931, Val_Acc_avg:61.25%
Epoch:3 Step:1000 Training_loss:0.514763, Acc_avg:63.00% Training_loss_avg:0.615714
Epoch:3 Step:1008 Training_loss:0.695984, Acc_avg:62.25% Training_loss_avg:0.620151
Epoch:3 Step:1016 Training_loss:0.564519, Acc_avg:63.00% Training_loss_avg:0.618605
Epoch:3 Step:1024 Training_loss:0.540069, Acc_avg:63.50% Training_loss_avg:0.617827
Epoch:3 Step:1032 Training_loss:0.722346, Acc_avg:63.25% Training_loss_avg:0.619864
Epoch:3 Step:1040 Training_loss:0.457146, Acc_avg:63.50% Training_loss_avg:0.617507
Epoch:3 Step:1048 Training_loss:0.616402, Acc_avg:64.00% Training_loss_avg:0.611541
Epoch:3 Step:1056 Training_loss:0.442409, Acc_avg:64.25% Training_loss_avg:0.607617
Epoch:3 Step:1064 Training_loss:0.476027, Acc_avg:64.25% Training_loss_avg:0.605157
Epoch:3 Step:1072 Training_loss:0.626600, Acc_avg:63.75% Training_loss_avg:0.604165
Epoch:3 Step:1080 Training_loss:0.673624, Acc_avg:63.50% Training_loss_avg:0.607375
Epoch:3 Step:1088 Tra

52it [00:07,  6.59it/s]


Epoch:3 Step:1240 Val_loss:0.614810, Val_Acc_avg:63.00%
Epoch:3 Step:1248 Training_loss:0.454334, Acc_avg:65.25% Training_loss_avg:0.603014
Epoch:3 Step:1256 Training_loss:0.605201, Acc_avg:65.00% Training_loss_avg:0.600806
Epoch:3 Step:1264 Training_loss:0.818556, Acc_avg:64.75% Training_loss_avg:0.606093
Epoch:3 Step:1272 Training_loss:0.640336, Acc_avg:64.75% Training_loss_avg:0.606213
Epoch:3 Step:1280 Training_loss:0.780769, Acc_avg:63.50% Training_loss_avg:0.611519
Epoch:3 Step:1288 Training_loss:0.732780, Acc_avg:62.50% Training_loss_avg:0.614704
Epoch:3 Step:1296 Training_loss:0.617710, Acc_avg:62.25% Training_loss_avg:0.616456
Epoch:3 Step:1304 Training_loss:0.652094, Acc_avg:61.75% Training_loss_avg:0.617571
Epoch:3 Step:1312 Training_loss:0.650520, Acc_avg:61.00% Training_loss_avg:0.619455
Epoch:3 Step:1320 Training_loss:0.718240, Acc_avg:60.50% Training_loss_avg:0.621320
Epoch:3 Step:1328 Training_loss:0.716765, Acc_avg:60.00% Training_loss_avg:0.624437
Epoch:3 Step:1336 Tr

52it [00:07,  6.61it/s]


Epoch:3 Step:1488 Val_loss:0.624381, Val_Acc_avg:61.75%
Epoch:3 Step:1496 Training_loss:0.663206, Acc_avg:60.75% Training_loss_avg:0.627787
Epoch:3 Step:1504 Training_loss:0.729999, Acc_avg:61.00% Training_loss_avg:0.629379
Epoch:3 Step:1512 Training_loss:0.620032, Acc_avg:61.25% Training_loss_avg:0.628281
Epoch:3 Step:1520 Training_loss:0.582218, Acc_avg:61.00% Training_loss_avg:0.628789
Epoch:3 Step:1528 Training_loss:0.647258, Acc_avg:60.25% Training_loss_avg:0.631612
Epoch:3 Step:1536 Training_loss:0.647776, Acc_avg:60.75% Training_loss_avg:0.631498
Epoch:3 Step:1544 Training_loss:0.618404, Acc_avg:60.75% Training_loss_avg:0.632956
Epoch:3 Step:1552 Training_loss:0.610300, Acc_avg:60.50% Training_loss_avg:0.632192
Epoch:3 Step:1560 Training_loss:0.644879, Acc_avg:60.00% Training_loss_avg:0.633741
Epoch:3 Step:1568 Training_loss:0.584515, Acc_avg:59.50% Training_loss_avg:0.636316
Epoch:3 Step:1576 Training_loss:0.569573, Acc_avg:60.25% Training_loss_avg:0.631347
Epoch:3 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:1736 Val_loss:0.601111, Val_Acc_avg:64.50%
Epoch:3 Step:1744 Training_loss:0.411607, Acc_avg:66.50% Training_loss_avg:0.615071
Epoch:3 Step:1752 Training_loss:0.512017, Acc_avg:66.75% Training_loss_avg:0.612572
Epoch:3 Step:1760 Training_loss:0.705350, Acc_avg:66.75% Training_loss_avg:0.615462
Epoch:3 Step:1768 Training_loss:0.505049, Acc_avg:67.00% Training_loss_avg:0.612002
Epoch:3 Step:1776 Training_loss:0.629922, Acc_avg:66.75% Training_loss_avg:0.612796
Epoch:3 Step:1784 Training_loss:0.601538, Acc_avg:66.75% Training_loss_avg:0.614989
Epoch:3 Step:1792 Training_loss:0.381571, Acc_avg:67.25% Training_loss_avg:0.608966
Epoch:3 Step:1800 Training_loss:0.369848, Acc_avg:68.00% Training_loss_avg:0.602000
Epoch:3 Step:1808 Training_loss:0.660263, Acc_avg:67.75% Training_loss_avg:0.603288
Epoch:3 Step:1816 Training_loss:0.634373, Acc_avg:67.25% Training_loss_avg:0.604226
Epoch:3 Step:1824 Training_loss:0.561856, Acc_avg:67.50% Training_loss_avg:0.602341
Epoch:3 Step:1832 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:1984 Val_loss:0.610588, Val_Acc_avg:63.50%
Epoch:3 Step:1992 Training_loss:0.627520, Acc_avg:66.25% Training_loss_avg:0.605811
Epoch:3 Step:2000 Training_loss:0.739562, Acc_avg:66.00% Training_loss_avg:0.609310
Epoch:3 Step:2008 Training_loss:0.581740, Acc_avg:66.50% Training_loss_avg:0.608191
Epoch:3 Step:2016 Training_loss:0.675741, Acc_avg:66.75% Training_loss_avg:0.608335
Epoch:3 Step:2024 Training_loss:0.760727, Acc_avg:66.25% Training_loss_avg:0.613022
Epoch:3 Step:2032 Training_loss:0.521168, Acc_avg:66.75% Training_loss_avg:0.610622
Epoch:3 Step:2040 Training_loss:0.426235, Acc_avg:66.75% Training_loss_avg:0.607335
Epoch:3 Step:2048 Training_loss:0.575513, Acc_avg:66.75% Training_loss_avg:0.605502
Epoch:3 Step:2056 Training_loss:0.551659, Acc_avg:66.25% Training_loss_avg:0.606156
Epoch:3 Step:2064 Training_loss:0.447071, Acc_avg:66.50% Training_loss_avg:0.602784
Epoch:3 Step:2072 Training_loss:0.752219, Acc_avg:66.25% Training_loss_avg:0.604576
Epoch:3 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2232 Val_loss:0.642983, Val_Acc_avg:61.25%
Epoch:3 Step:2240 Training_loss:0.622493, Acc_avg:66.00% Training_loss_avg:0.609612
Epoch:3 Step:2248 Training_loss:0.655407, Acc_avg:66.00% Training_loss_avg:0.611095
Epoch:3 Step:2256 Training_loss:0.683462, Acc_avg:66.25% Training_loss_avg:0.611609
Epoch:3 Step:2264 Training_loss:0.542995, Acc_avg:66.00% Training_loss_avg:0.610229
Epoch:3 Step:2272 Training_loss:0.695549, Acc_avg:65.50% Training_loss_avg:0.613982
Epoch:3 Step:2280 Training_loss:0.471307, Acc_avg:66.50% Training_loss_avg:0.609596
Epoch:3 Step:2288 Training_loss:0.489521, Acc_avg:66.75% Training_loss_avg:0.611479
Epoch:3 Step:2296 Training_loss:0.507381, Acc_avg:66.50% Training_loss_avg:0.612487
Epoch:3 Step:2304 Training_loss:0.558459, Acc_avg:67.00% Training_loss_avg:0.609327
Epoch:3 Step:2312 Training_loss:0.445850, Acc_avg:67.50% Training_loss_avg:0.607115
Epoch:3 Step:2320 Training_loss:0.708072, Acc_avg:67.50% Training_loss_avg:0.607050
Epoch:3 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2480 Val_loss:0.656195, Val_Acc_avg:61.50%
Epoch:3 Step:2488 Training_loss:0.956784, Acc_avg:67.50% Training_loss_avg:0.617961
Epoch:3 Step:2496 Training_loss:0.707140, Acc_avg:67.50% Training_loss_avg:0.617024
Epoch:3 Step:2504 Training_loss:0.652036, Acc_avg:66.75% Training_loss_avg:0.617875
Epoch:3 Step:2512 Training_loss:0.797718, Acc_avg:66.25% Training_loss_avg:0.620667
Epoch:3 Step:2520 Training_loss:0.390537, Acc_avg:67.50% Training_loss_avg:0.602951
Epoch:3 Step:2528 Training_loss:0.687554, Acc_avg:67.25% Training_loss_avg:0.601123
Epoch:3 Step:2536 Training_loss:0.310565, Acc_avg:67.75% Training_loss_avg:0.594441
Epoch:3 Step:2544 Training_loss:0.579448, Acc_avg:67.25% Training_loss_avg:0.595238
Epoch:3 Step:2552 Training_loss:0.575855, Acc_avg:67.00% Training_loss_avg:0.595109
Epoch:3 Step:2560 Training_loss:0.578367, Acc_avg:67.00% Training_loss_avg:0.596190
Epoch:3 Step:2568 Training_loss:0.481230, Acc_avg:67.00% Training_loss_avg:0.597081
Epoch:3 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2728 Val_loss:0.671119, Val_Acc_avg:62.00%
Epoch:3 Step:2736 Training_loss:0.478246, Acc_avg:65.50% Training_loss_avg:0.606921
Epoch:3 Step:2744 Training_loss:0.643379, Acc_avg:65.50% Training_loss_avg:0.607774
Epoch:3 Step:2752 Training_loss:0.765436, Acc_avg:65.50% Training_loss_avg:0.609073
Epoch:3 Step:2760 Training_loss:0.751441, Acc_avg:65.25% Training_loss_avg:0.612641
Epoch:3 Step:2768 Training_loss:0.690633, Acc_avg:65.00% Training_loss_avg:0.613990
Epoch:3 Step:2776 Training_loss:0.458710, Acc_avg:64.75% Training_loss_avg:0.612723
Epoch:3 Step:2784 Training_loss:0.592322, Acc_avg:64.75% Training_loss_avg:0.612525
Epoch:3 Step:2792 Training_loss:0.575602, Acc_avg:64.25% Training_loss_avg:0.613297
Epoch:3 Step:2800 Training_loss:0.654432, Acc_avg:64.00% Training_loss_avg:0.616286
Epoch:3 Step:2808 Training_loss:0.572287, Acc_avg:64.25% Training_loss_avg:0.611267
Epoch:3 Step:2816 Training_loss:0.487838, Acc_avg:64.50% Training_loss_avg:0.609408
Epoch:3 Step:2824 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2976 Val_loss:0.653967, Val_Acc_avg:61.00%
Epoch:3 Step:2984 Training_loss:0.785879, Acc_avg:67.00% Training_loss_avg:0.604647
Epoch:3 Step:2992 Training_loss:0.885027, Acc_avg:67.00% Training_loss_avg:0.607627
Epoch:3 Step:3000 Training_loss:0.476364, Acc_avg:67.50% Training_loss_avg:0.602413
Epoch:3 Step:3008 Training_loss:0.546218, Acc_avg:68.25% Training_loss_avg:0.596039
Epoch:3 Step:3016 Training_loss:0.672829, Acc_avg:68.00% Training_loss_avg:0.595656
Epoch:3 Step:3024 Training_loss:0.864699, Acc_avg:67.75% Training_loss_avg:0.596955
Epoch:3 Step:3032 Training_loss:0.730464, Acc_avg:68.00% Training_loss_avg:0.596050
Epoch:3 Step:3040 Training_loss:0.809975, Acc_avg:67.00% Training_loss_avg:0.601922
Epoch:3 Step:3048 Training_loss:0.621974, Acc_avg:66.50% Training_loss_avg:0.606975
Epoch:3 Step:3056 Training_loss:0.693864, Acc_avg:66.00% Training_loss_avg:0.610851
Epoch:3 Step:3064 Training_loss:0.777040, Acc_avg:65.25% Training_loss_avg:0.618612
Epoch:3 Step:3072 Tr

52it [00:07,  6.61it/s]


Epoch:3 Step:3224 Val_loss:0.611794, Val_Acc_avg:63.25%
Epoch:3 Step:3232 Training_loss:0.486050, Acc_avg:66.25% Training_loss_avg:0.623561
Epoch:3 Step:3240 Training_loss:0.617754, Acc_avg:66.50% Training_loss_avg:0.623103
Epoch:3 Step:3248 Training_loss:0.806287, Acc_avg:66.25% Training_loss_avg:0.624482
Epoch:3 Step:3256 Training_loss:0.531361, Acc_avg:66.75% Training_loss_avg:0.625256
Epoch:3 Step:3264 Training_loss:0.602097, Acc_avg:67.00% Training_loss_avg:0.624020
Epoch:3 Step:3272 Training_loss:0.500529, Acc_avg:67.00% Training_loss_avg:0.622493
Epoch:3 Step:3280 Training_loss:0.443621, Acc_avg:67.50% Training_loss_avg:0.620411
Epoch:3 Step:3288 Training_loss:0.796502, Acc_avg:67.00% Training_loss_avg:0.625238
Epoch:3 Step:3296 Training_loss:0.763289, Acc_avg:66.50% Training_loss_avg:0.629168
Epoch:3 Step:3304 Training_loss:0.680711, Acc_avg:65.75% Training_loss_avg:0.633488
Epoch:3 Step:3312 Training_loss:0.489009, Acc_avg:65.50% Training_loss_avg:0.633048
Epoch:3 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:0 Val_loss:0.619098, Val_Acc_avg:62.75%
Epoch:4 Step:8 Training_loss:0.593604, Acc_avg:64.58% Training_loss_avg:0.631396
Epoch:4 Step:16 Training_loss:0.522644, Acc_avg:64.83% Training_loss_avg:0.631394
Epoch:4 Step:24 Training_loss:0.602938, Acc_avg:65.08% Training_loss_avg:0.631508
Epoch:4 Step:32 Training_loss:0.913737, Acc_avg:64.33% Training_loss_avg:0.634676
Epoch:4 Step:40 Training_loss:0.484904, Acc_avg:64.58% Training_loss_avg:0.633527
Epoch:4 Step:48 Training_loss:0.807441, Acc_avg:64.08% Training_loss_avg:0.633958
Epoch:4 Step:56 Training_loss:0.745204, Acc_avg:64.08% Training_loss_avg:0.631162
Epoch:4 Step:64 Training_loss:0.862421, Acc_avg:63.08% Training_loss_avg:0.638883
Epoch:4 Step:72 Training_loss:0.447959, Acc_avg:63.33% Training_loss_avg:0.636918
Epoch:4 Step:80 Training_loss:0.619011, Acc_avg:63.33% Training_loss_avg:0.635841
Epoch:4 Step:88 Training_loss:0.423735, Acc_avg:64.08% Training_loss_avg:0.627022
Epoch:4 Step:96 Training_loss:0.437929, Acc_av

52it [00:07,  6.61it/s]


Epoch:4 Step:248 Val_loss:0.609722, Val_Acc_avg:62.25%
Epoch:4 Step:256 Training_loss:0.366894, Acc_avg:67.33% Training_loss_avg:0.586654
Epoch:4 Step:264 Training_loss:0.434152, Acc_avg:67.33% Training_loss_avg:0.582577
Epoch:4 Step:272 Training_loss:0.457182, Acc_avg:67.58% Training_loss_avg:0.579330
Epoch:4 Step:280 Training_loss:0.549323, Acc_avg:67.58% Training_loss_avg:0.580207
Epoch:4 Step:288 Training_loss:0.591342, Acc_avg:67.08% Training_loss_avg:0.581968
Epoch:4 Step:296 Training_loss:0.554012, Acc_avg:66.83% Training_loss_avg:0.583327
Epoch:4 Step:304 Training_loss:0.656700, Acc_avg:66.83% Training_loss_avg:0.584106
Epoch:4 Step:312 Training_loss:0.611790, Acc_avg:67.08% Training_loss_avg:0.580216
Epoch:4 Step:320 Training_loss:0.481447, Acc_avg:66.58% Training_loss_avg:0.579218
Epoch:4 Step:328 Training_loss:0.585594, Acc_avg:66.83% Training_loss_avg:0.578888
Epoch:4 Step:336 Training_loss:0.549053, Acc_avg:66.58% Training_loss_avg:0.579858
Epoch:4 Step:344 Training_loss:0

52it [00:07,  6.59it/s]


Epoch:4 Step:496 Val_loss:0.595495, Val_Acc_avg:65.00%
Epoch:4 Step:504 Training_loss:0.453672, Acc_avg:64.75% Training_loss_avg:0.575873
Epoch:4 Step:512 Training_loss:0.489075, Acc_avg:65.00% Training_loss_avg:0.572719
Epoch:4 Step:520 Training_loss:0.685527, Acc_avg:64.50% Training_loss_avg:0.573745
Epoch:4 Step:528 Training_loss:0.480933, Acc_avg:64.00% Training_loss_avg:0.575717
Epoch:4 Step:536 Training_loss:0.486164, Acc_avg:64.50% Training_loss_avg:0.571916
Epoch:4 Step:544 Training_loss:0.523184, Acc_avg:64.25% Training_loss_avg:0.570123
Epoch:4 Step:552 Training_loss:0.764245, Acc_avg:63.50% Training_loss_avg:0.575803
Epoch:4 Step:560 Training_loss:0.552671, Acc_avg:63.75% Training_loss_avg:0.577527
Epoch:4 Step:568 Training_loss:0.540816, Acc_avg:63.75% Training_loss_avg:0.577673
Epoch:4 Step:576 Training_loss:0.466184, Acc_avg:64.50% Training_loss_avg:0.573680
Epoch:4 Step:584 Training_loss:0.591001, Acc_avg:65.00% Training_loss_avg:0.570831
Epoch:4 Step:592 Training_loss:0

52it [00:07,  6.59it/s]


Epoch:4 Step:744 Val_loss:0.668316, Val_Acc_avg:62.00%
Epoch:4 Step:752 Training_loss:0.922694, Acc_avg:64.25% Training_loss_avg:0.600271
Epoch:4 Step:760 Training_loss:0.768027, Acc_avg:64.75% Training_loss_avg:0.602395
Epoch:4 Step:768 Training_loss:0.571298, Acc_avg:64.75% Training_loss_avg:0.603094
Epoch:4 Step:776 Training_loss:0.680366, Acc_avg:65.75% Training_loss_avg:0.599447
Epoch:4 Step:784 Training_loss:0.555312, Acc_avg:66.00% Training_loss_avg:0.597686
Epoch:4 Step:792 Training_loss:0.660242, Acc_avg:66.00% Training_loss_avg:0.600168
Epoch:4 Step:800 Training_loss:0.385088, Acc_avg:66.75% Training_loss_avg:0.596357
Epoch:4 Step:808 Training_loss:0.564761, Acc_avg:66.75% Training_loss_avg:0.595860
Epoch:4 Step:816 Training_loss:0.448481, Acc_avg:67.25% Training_loss_avg:0.592404
Epoch:4 Step:824 Training_loss:0.807679, Acc_avg:66.50% Training_loss_avg:0.597884
Epoch:4 Step:832 Training_loss:0.745680, Acc_avg:66.00% Training_loss_avg:0.600777
Epoch:4 Step:840 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:4 Step:992 Val_loss:0.602248, Val_Acc_avg:63.25%
Epoch:4 Step:1000 Training_loss:0.565582, Acc_avg:66.25% Training_loss_avg:0.611945
Epoch:4 Step:1008 Training_loss:1.049555, Acc_avg:65.50% Training_loss_avg:0.620435
Epoch:4 Step:1016 Training_loss:0.401065, Acc_avg:65.50% Training_loss_avg:0.617570
Epoch:4 Step:1024 Training_loss:0.571672, Acc_avg:65.75% Training_loss_avg:0.617208
Epoch:4 Step:1032 Training_loss:0.514805, Acc_avg:65.75% Training_loss_avg:0.616347
Epoch:4 Step:1040 Training_loss:0.431105, Acc_avg:66.75% Training_loss_avg:0.609342
Epoch:4 Step:1048 Training_loss:0.582802, Acc_avg:66.25% Training_loss_avg:0.610469
Epoch:4 Step:1056 Training_loss:0.625747, Acc_avg:65.50% Training_loss_avg:0.610887
Epoch:4 Step:1064 Training_loss:0.626103, Acc_avg:65.25% Training_loss_avg:0.608382
Epoch:4 Step:1072 Training_loss:0.410968, Acc_avg:65.50% Training_loss_avg:0.607102
Epoch:4 Step:1080 Training_loss:0.505820, Acc_avg:65.75% Training_loss_avg:0.606666
Epoch:4 Step:1088 Tra

52it [00:07,  6.60it/s]


Epoch:4 Step:1240 Val_loss:0.596204, Val_Acc_avg:65.25%
Epoch:4 Step:1248 Training_loss:0.379117, Acc_avg:67.50% Training_loss_avg:0.581598
Epoch:4 Step:1256 Training_loss:0.378010, Acc_avg:68.00% Training_loss_avg:0.577157
Epoch:4 Step:1264 Training_loss:0.452503, Acc_avg:68.00% Training_loss_avg:0.573583
Epoch:4 Step:1272 Training_loss:0.467534, Acc_avg:68.25% Training_loss_avg:0.570926
Epoch:4 Step:1280 Training_loss:0.570065, Acc_avg:68.00% Training_loss_avg:0.572353
Epoch:4 Step:1288 Training_loss:0.788560, Acc_avg:68.00% Training_loss_avg:0.573107
Epoch:4 Step:1296 Training_loss:0.406749, Acc_avg:68.50% Training_loss_avg:0.569833
Epoch:4 Step:1304 Training_loss:0.778925, Acc_avg:68.75% Training_loss_avg:0.569219
Epoch:4 Step:1312 Training_loss:0.609969, Acc_avg:69.00% Training_loss_avg:0.569467
Epoch:4 Step:1320 Training_loss:0.675253, Acc_avg:69.25% Training_loss_avg:0.572450
Epoch:4 Step:1328 Training_loss:0.626695, Acc_avg:69.25% Training_loss_avg:0.573877
Epoch:4 Step:1336 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:1488 Val_loss:0.658630, Val_Acc_avg:61.25%
Epoch:4 Step:1496 Training_loss:0.581979, Acc_avg:68.00% Training_loss_avg:0.591126
Epoch:4 Step:1504 Training_loss:0.540614, Acc_avg:67.75% Training_loss_avg:0.591433
Epoch:4 Step:1512 Training_loss:0.497744, Acc_avg:68.00% Training_loss_avg:0.586522
Epoch:4 Step:1520 Training_loss:0.564577, Acc_avg:68.00% Training_loss_avg:0.585105
Epoch:4 Step:1528 Training_loss:0.678117, Acc_avg:68.00% Training_loss_avg:0.589875
Epoch:4 Step:1536 Training_loss:0.602918, Acc_avg:68.50% Training_loss_avg:0.586357
Epoch:4 Step:1544 Training_loss:0.496082, Acc_avg:68.50% Training_loss_avg:0.582275
Epoch:4 Step:1552 Training_loss:0.397726, Acc_avg:69.50% Training_loss_avg:0.573138
Epoch:4 Step:1560 Training_loss:0.688237, Acc_avg:69.00% Training_loss_avg:0.573671
Epoch:4 Step:1568 Training_loss:0.796919, Acc_avg:68.75% Training_loss_avg:0.581294
Epoch:4 Step:1576 Training_loss:0.508922, Acc_avg:68.50% Training_loss_avg:0.585128
Epoch:4 Step:1584 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:1736 Val_loss:0.664251, Val_Acc_avg:61.25%
Epoch:4 Step:1744 Training_loss:0.845736, Acc_avg:65.75% Training_loss_avg:0.597292
Epoch:4 Step:1752 Training_loss:0.731308, Acc_avg:65.25% Training_loss_avg:0.601730
Epoch:4 Step:1760 Training_loss:0.595000, Acc_avg:65.25% Training_loss_avg:0.603678
Epoch:4 Step:1768 Training_loss:0.313607, Acc_avg:65.75% Training_loss_avg:0.596691
Epoch:4 Step:1776 Training_loss:0.639755, Acc_avg:66.00% Training_loss_avg:0.594317
Epoch:4 Step:1784 Training_loss:0.845015, Acc_avg:65.50% Training_loss_avg:0.603768
Epoch:4 Step:1792 Training_loss:0.586666, Acc_avg:66.00% Training_loss_avg:0.596470
Epoch:4 Step:1800 Training_loss:0.506223, Acc_avg:65.75% Training_loss_avg:0.599007
Epoch:4 Step:1808 Training_loss:0.348527, Acc_avg:67.00% Training_loss_avg:0.588347
Epoch:4 Step:1816 Training_loss:0.493052, Acc_avg:67.25% Training_loss_avg:0.582755
Epoch:4 Step:1824 Training_loss:0.555119, Acc_avg:68.00% Training_loss_avg:0.579930
Epoch:4 Step:1832 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:1984 Val_loss:0.598995, Val_Acc_avg:65.00%
Epoch:4 Step:1992 Training_loss:0.467304, Acc_avg:67.00% Training_loss_avg:0.587400
Epoch:4 Step:2000 Training_loss:0.388438, Acc_avg:66.75% Training_loss_avg:0.588253
Epoch:4 Step:2008 Training_loss:0.506624, Acc_avg:67.50% Training_loss_avg:0.582989
Epoch:4 Step:2016 Training_loss:0.538349, Acc_avg:67.75% Training_loss_avg:0.581183
Epoch:4 Step:2024 Training_loss:0.603880, Acc_avg:67.75% Training_loss_avg:0.580733
Epoch:4 Step:2032 Training_loss:0.374009, Acc_avg:68.25% Training_loss_avg:0.577851
Epoch:4 Step:2040 Training_loss:0.697298, Acc_avg:68.25% Training_loss_avg:0.580994
Epoch:4 Step:2048 Training_loss:0.784366, Acc_avg:67.50% Training_loss_avg:0.584646
Epoch:4 Step:2056 Training_loss:0.506725, Acc_avg:68.00% Training_loss_avg:0.583594
Epoch:4 Step:2064 Training_loss:0.617442, Acc_avg:67.75% Training_loss_avg:0.584871
Epoch:4 Step:2072 Training_loss:0.544316, Acc_avg:67.75% Training_loss_avg:0.584154
Epoch:4 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:2232 Val_loss:0.621288, Val_Acc_avg:64.00%
Epoch:4 Step:2240 Training_loss:0.403571, Acc_avg:71.00% Training_loss_avg:0.554293
Epoch:4 Step:2248 Training_loss:0.561531, Acc_avg:70.75% Training_loss_avg:0.555164
Epoch:4 Step:2256 Training_loss:0.422216, Acc_avg:70.75% Training_loss_avg:0.552019
Epoch:4 Step:2264 Training_loss:0.494383, Acc_avg:70.75% Training_loss_avg:0.550159
Epoch:4 Step:2272 Training_loss:0.491439, Acc_avg:71.25% Training_loss_avg:0.546738
Epoch:4 Step:2280 Training_loss:0.558225, Acc_avg:71.00% Training_loss_avg:0.546080
Epoch:4 Step:2288 Training_loss:0.721873, Acc_avg:71.00% Training_loss_avg:0.549487
Epoch:4 Step:2296 Training_loss:0.323733, Acc_avg:71.75% Training_loss_avg:0.544303
Epoch:4 Step:2304 Training_loss:0.546959, Acc_avg:72.00% Training_loss_avg:0.541453
Epoch:4 Step:2312 Training_loss:0.869623, Acc_avg:71.00% Training_loss_avg:0.549535
Epoch:4 Step:2320 Training_loss:0.691724, Acc_avg:71.25% Training_loss_avg:0.551198
Epoch:4 Step:2328 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:2480 Val_loss:0.643476, Val_Acc_avg:64.25%
Epoch:4 Step:2488 Training_loss:0.398714, Acc_avg:73.25% Training_loss_avg:0.540055
Epoch:4 Step:2496 Training_loss:0.800660, Acc_avg:72.50% Training_loss_avg:0.546269
Epoch:4 Step:2504 Training_loss:0.589434, Acc_avg:72.75% Training_loss_avg:0.543645
Epoch:4 Step:2512 Training_loss:0.573930, Acc_avg:72.25% Training_loss_avg:0.545252
Epoch:4 Step:2520 Training_loss:0.473756, Acc_avg:72.50% Training_loss_avg:0.544994
Epoch:4 Step:2528 Training_loss:0.935882, Acc_avg:71.50% Training_loss_avg:0.555572
Epoch:4 Step:2536 Training_loss:0.603663, Acc_avg:71.25% Training_loss_avg:0.559425
Epoch:4 Step:2544 Training_loss:0.903989, Acc_avg:70.00% Training_loss_avg:0.569070
Epoch:4 Step:2552 Training_loss:0.631433, Acc_avg:69.75% Training_loss_avg:0.572404
Epoch:4 Step:2560 Training_loss:0.685410, Acc_avg:69.50% Training_loss_avg:0.570534
Epoch:4 Step:2568 Training_loss:0.315708, Acc_avg:70.25% Training_loss_avg:0.563961
Epoch:4 Step:2576 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:2728 Val_loss:0.607856, Val_Acc_avg:66.00%
Epoch:4 Step:2736 Training_loss:0.583592, Acc_avg:69.75% Training_loss_avg:0.566632
Epoch:4 Step:2744 Training_loss:0.831829, Acc_avg:69.50% Training_loss_avg:0.570145
Epoch:4 Step:2752 Training_loss:0.421502, Acc_avg:69.25% Training_loss_avg:0.571403
Epoch:4 Step:2760 Training_loss:0.699395, Acc_avg:69.50% Training_loss_avg:0.570339
Epoch:4 Step:2768 Training_loss:0.506170, Acc_avg:70.00% Training_loss_avg:0.568305
Epoch:4 Step:2776 Training_loss:0.426973, Acc_avg:69.50% Training_loss_avg:0.569998
Epoch:4 Step:2784 Training_loss:0.293821, Acc_avg:70.00% Training_loss_avg:0.567755
Epoch:4 Step:2792 Training_loss:0.795251, Acc_avg:70.25% Training_loss_avg:0.569779
Epoch:4 Step:2800 Training_loss:0.606037, Acc_avg:69.75% Training_loss_avg:0.570472
Epoch:4 Step:2808 Training_loss:0.929869, Acc_avg:69.25% Training_loss_avg:0.575560
Epoch:4 Step:2816 Training_loss:0.326508, Acc_avg:69.75% Training_loss_avg:0.571380
Epoch:4 Step:2824 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:2976 Val_loss:0.596453, Val_Acc_avg:67.25%
Epoch:4 Step:2984 Training_loss:0.546762, Acc_avg:72.50% Training_loss_avg:0.546559
Epoch:4 Step:2992 Training_loss:0.468825, Acc_avg:72.00% Training_loss_avg:0.550518
Epoch:4 Step:3000 Training_loss:0.762902, Acc_avg:71.25% Training_loss_avg:0.558095
Epoch:4 Step:3008 Training_loss:0.446309, Acc_avg:71.75% Training_loss_avg:0.554606
Epoch:4 Step:3016 Training_loss:0.682584, Acc_avg:71.25% Training_loss_avg:0.555735
Epoch:4 Step:3024 Training_loss:0.399476, Acc_avg:71.00% Training_loss_avg:0.557200
Epoch:4 Step:3032 Training_loss:0.581398, Acc_avg:71.00% Training_loss_avg:0.556544
Epoch:4 Step:3040 Training_loss:0.401827, Acc_avg:71.00% Training_loss_avg:0.556771
Epoch:4 Step:3048 Training_loss:0.803543, Acc_avg:70.75% Training_loss_avg:0.566597
Epoch:4 Step:3056 Training_loss:0.820118, Acc_avg:69.75% Training_loss_avg:0.575939
Epoch:4 Step:3064 Training_loss:0.607218, Acc_avg:70.25% Training_loss_avg:0.575142
Epoch:4 Step:3072 Tr

52it [00:07,  6.60it/s]


Epoch:4 Step:3224 Val_loss:0.613616, Val_Acc_avg:63.75%
Epoch:4 Step:3232 Training_loss:0.580986, Acc_avg:69.00% Training_loss_avg:0.578269
Epoch:4 Step:3240 Training_loss:0.838321, Acc_avg:68.50% Training_loss_avg:0.582393
Epoch:4 Step:3248 Training_loss:0.584132, Acc_avg:68.50% Training_loss_avg:0.584188
Epoch:4 Step:3256 Training_loss:0.773404, Acc_avg:67.50% Training_loss_avg:0.593249
Epoch:4 Step:3264 Training_loss:0.456495, Acc_avg:67.50% Training_loss_avg:0.595212
Epoch:4 Step:3272 Training_loss:0.729953, Acc_avg:67.00% Training_loss_avg:0.601798
Epoch:4 Step:3280 Training_loss:0.616288, Acc_avg:66.50% Training_loss_avg:0.603514
Epoch:4 Step:3288 Training_loss:0.567872, Acc_avg:66.50% Training_loss_avg:0.602931
Epoch:4 Step:3296 Training_loss:0.597734, Acc_avg:66.25% Training_loss_avg:0.601283
Epoch:4 Step:3304 Training_loss:0.517985, Acc_avg:66.50% Training_loss_avg:0.602033
Epoch:4 Step:3312 Training_loss:0.518588, Acc_avg:66.75% Training_loss_avg:0.597189
Epoch:4 Step:3320 Tr

In [None]:
def main():
    """
    Main configuration function for a given finetune run
    :return: None
    """
    run_name = "lr_5e-6 redo, 512 split with attention!"
    run_dir = "codebert_finetune_runs/{}".format(run_name)
    model_name = 'codebert-base'
    checkpoint_location = None
    online = False
    load_splits = False
    save_data = True

    if load_splits:
      train_data, val_data, test_data = split_loader(run_dir)
      X_train, A_train, Y_train = train_data
      X_val, A_val, Y_val = val_data
      X_test, A_test, Y_test = test_data
      print("loaded data splits")

    else:
      print("generating data splits")

      code_df = preprocess_data(file_loc='code_dataset.jsonl')
      train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

      X_train, A_train, Y_train = train_data
      X_val, A_val, Y_val = val_data
      X_test, A_test, Y_test = test_data

      data_type = ['train', 'val', 'test']
      data_split_type = ['X', 'A', 'Y']

    
    # Creating dir to save logs and checkpoints, re
    dir_name = "{}".format(run_dir)
    if os.path.exists(dir_name):
        input("run name already exists, press Enter to overwrite")
    else:
        os.makedirs(dir_name)


    if save_data:
      print("saving data splits")

      data_all = [train_data, val_data, test_data]
      for i, data in enumerate(data_all):
        for j, split in enumerate(data):
          with open('{}/{}_{}.pickle'.format(run_dir,data_type[i], data_split_type[j]), 'wb') as handle:
            pickle.dump(split, handle)


    print(train_data[0].shape)
    print("Data points: {}".format(len(train_data)))

    # Loading model from checkpoint if location provided
    if online:
        print("loading model from online")
        model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
    elif checkpoint_location is None:
        print("loading model from local repo")

        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    else:
        print("loading model from checkpoint: {}".format(checkpoint_location))
        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)

    train(model=model,
          train_data=train_data,
          val_data=val_data,
          epochs=5,
          batch_size=8,
          learning_rate=5e-6,
          validate_per=250,
          run_name=run_name,
          run_descrption="lr_5e-6 redo, 512 split, saving splits")


In [78]:
torch.cuda.empty_cache()
main()

generating data splits
Insecure code counts: 3729, Total code counts: 8000, Proportion 0.466125
Data points: 8000




saving data splits
torch.Size([3331, 512])
Data points: 3
loading model from local repo


Some weights of the model checkpoint at codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at codebert-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for p

run name already exists, exiting to prevent overwriting




Epoch:0 Step:0 Training_loss:0.686342, Acc_avg:37.50% Training_loss_avg:0.686342
Validating:


52it [00:07,  6.61it/s]


Epoch:0 Step:0 Val_loss:0.702853, Val_Acc_avg:43.25%
Epoch:0 Step:8 Training_loss:0.686846, Acc_avg:56.25% Training_loss_avg:0.686594
Epoch:0 Step:16 Training_loss:0.732139, Acc_avg:50.00% Training_loss_avg:0.701776
Epoch:0 Step:24 Training_loss:0.702062, Acc_avg:46.88% Training_loss_avg:0.701848
Epoch:0 Step:32 Training_loss:0.624708, Acc_avg:57.50% Training_loss_avg:0.686420
Epoch:0 Step:40 Training_loss:0.693738, Acc_avg:58.33% Training_loss_avg:0.687639
Epoch:0 Step:48 Training_loss:0.719046, Acc_avg:55.36% Training_loss_avg:0.692126
Epoch:0 Step:56 Training_loss:0.701266, Acc_avg:53.12% Training_loss_avg:0.693268
Epoch:0 Step:64 Training_loss:0.699633, Acc_avg:51.39% Training_loss_avg:0.693976
Epoch:0 Step:72 Training_loss:0.716591, Acc_avg:51.25% Training_loss_avg:0.696237
Epoch:0 Step:80 Training_loss:0.683150, Acc_avg:52.27% Training_loss_avg:0.695047
Epoch:0 Step:88 Training_loss:0.710724, Acc_avg:51.04% Training_loss_avg:0.696354
Epoch:0 Step:96 Training_loss:0.720085, Acc_av

52it [00:07,  6.61it/s]


Epoch:0 Step:248 Val_loss:0.687149, Val_Acc_avg:56.00%
Epoch:0 Step:256 Training_loss:0.672887, Acc_avg:55.30% Training_loss_avg:0.686639
Epoch:0 Step:264 Training_loss:0.701342, Acc_avg:55.15% Training_loss_avg:0.687071
Epoch:0 Step:272 Training_loss:0.686413, Acc_avg:55.00% Training_loss_avg:0.687052
Epoch:0 Step:280 Training_loss:0.692700, Acc_avg:54.86% Training_loss_avg:0.687209
Epoch:0 Step:288 Training_loss:0.729866, Acc_avg:54.39% Training_loss_avg:0.688362
Epoch:0 Step:296 Training_loss:0.714574, Acc_avg:53.95% Training_loss_avg:0.689052
Epoch:0 Step:304 Training_loss:0.725544, Acc_avg:53.53% Training_loss_avg:0.689988
Epoch:0 Step:312 Training_loss:0.738837, Acc_avg:53.75% Training_loss_avg:0.691209
Epoch:0 Step:320 Training_loss:0.686281, Acc_avg:53.35% Training_loss_avg:0.691089
Epoch:0 Step:328 Training_loss:0.643361, Acc_avg:53.57% Training_loss_avg:0.689952
Epoch:0 Step:336 Training_loss:0.683200, Acc_avg:53.49% Training_loss_avg:0.689795
Epoch:0 Step:344 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:0 Step:496 Val_loss:0.692361, Val_Acc_avg:51.00%
Epoch:0 Step:504 Training_loss:0.693638, Acc_avg:53.50% Training_loss_avg:0.689799
Epoch:0 Step:512 Training_loss:0.703834, Acc_avg:53.25% Training_loss_avg:0.690631
Epoch:0 Step:520 Training_loss:0.666999, Acc_avg:53.25% Training_loss_avg:0.690631
Epoch:0 Step:528 Training_loss:0.625619, Acc_avg:53.75% Training_loss_avg:0.689639
Epoch:0 Step:536 Training_loss:0.700659, Acc_avg:53.50% Training_loss_avg:0.690862
Epoch:0 Step:544 Training_loss:0.660008, Acc_avg:54.00% Training_loss_avg:0.689446
Epoch:0 Step:552 Training_loss:0.739076, Acc_avg:54.00% Training_loss_avg:0.690673
Epoch:0 Step:560 Training_loss:0.730489, Acc_avg:53.25% Training_loss_avg:0.691388
Epoch:0 Step:568 Training_loss:0.733346, Acc_avg:52.25% Training_loss_avg:0.692773
Epoch:0 Step:576 Training_loss:0.685395, Acc_avg:52.25% Training_loss_avg:0.693362
Epoch:0 Step:584 Training_loss:0.678999, Acc_avg:53.00% Training_loss_avg:0.692442
Epoch:0 Step:592 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:0 Step:744 Val_loss:0.682478, Val_Acc_avg:56.75%
Epoch:0 Step:752 Training_loss:0.579665, Acc_avg:52.25% Training_loss_avg:0.693574
Epoch:0 Step:760 Training_loss:0.587760, Acc_avg:53.00% Training_loss_avg:0.691147
Epoch:0 Step:768 Training_loss:0.599693, Acc_avg:53.25% Training_loss_avg:0.689835
Epoch:0 Step:776 Training_loss:0.664791, Acc_avg:52.75% Training_loss_avg:0.690504
Epoch:0 Step:784 Training_loss:0.777641, Acc_avg:51.50% Training_loss_avg:0.693006
Epoch:0 Step:792 Training_loss:0.687991, Acc_avg:51.75% Training_loss_avg:0.692011
Epoch:0 Step:800 Training_loss:0.783083, Acc_avg:51.25% Training_loss_avg:0.693019
Epoch:0 Step:808 Training_loss:0.681606, Acc_avg:51.75% Training_loss_avg:0.691762
Epoch:0 Step:816 Training_loss:0.740076, Acc_avg:52.00% Training_loss_avg:0.691670
Epoch:0 Step:824 Training_loss:0.658406, Acc_avg:52.50% Training_loss_avg:0.690773
Epoch:0 Step:832 Training_loss:0.657484, Acc_avg:53.00% Training_loss_avg:0.689829
Epoch:0 Step:840 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:0 Step:992 Val_loss:0.684027, Val_Acc_avg:57.50%
Epoch:0 Step:1000 Training_loss:0.742666, Acc_avg:53.50% Training_loss_avg:0.689394
Epoch:0 Step:1008 Training_loss:0.695131, Acc_avg:53.75% Training_loss_avg:0.688364
Epoch:0 Step:1016 Training_loss:0.726107, Acc_avg:53.75% Training_loss_avg:0.688647
Epoch:0 Step:1024 Training_loss:0.694181, Acc_avg:53.50% Training_loss_avg:0.689742
Epoch:0 Step:1032 Training_loss:0.664683, Acc_avg:54.25% Training_loss_avg:0.688955
Epoch:0 Step:1040 Training_loss:0.723574, Acc_avg:54.00% Training_loss_avg:0.689424
Epoch:0 Step:1048 Training_loss:0.659806, Acc_avg:53.75% Training_loss_avg:0.690501
Epoch:0 Step:1056 Training_loss:0.641882, Acc_avg:54.00% Training_loss_avg:0.689583
Epoch:0 Step:1064 Training_loss:0.703656, Acc_avg:53.25% Training_loss_avg:0.691470
Epoch:0 Step:1072 Training_loss:0.663331, Acc_avg:53.50% Training_loss_avg:0.689827
Epoch:0 Step:1080 Training_loss:0.668947, Acc_avg:54.00% Training_loss_avg:0.689242
Epoch:0 Step:1088 Tra

52it [00:07,  6.61it/s]


Epoch:0 Step:1240 Val_loss:0.684373, Val_Acc_avg:56.75%
Epoch:0 Step:1248 Training_loss:0.695780, Acc_avg:56.00% Training_loss_avg:0.688483
Epoch:0 Step:1256 Training_loss:0.647460, Acc_avg:56.50% Training_loss_avg:0.687130
Epoch:0 Step:1264 Training_loss:0.639407, Acc_avg:57.00% Training_loss_avg:0.685445
Epoch:0 Step:1272 Training_loss:0.650423, Acc_avg:57.25% Training_loss_avg:0.684982
Epoch:0 Step:1280 Training_loss:0.655698, Acc_avg:57.50% Training_loss_avg:0.684264
Epoch:0 Step:1288 Training_loss:0.654152, Acc_avg:57.25% Training_loss_avg:0.683495
Epoch:0 Step:1296 Training_loss:0.675169, Acc_avg:57.00% Training_loss_avg:0.683822
Epoch:0 Step:1304 Training_loss:0.702343, Acc_avg:57.75% Training_loss_avg:0.682204
Epoch:0 Step:1312 Training_loss:0.656873, Acc_avg:58.25% Training_loss_avg:0.680892
Epoch:0 Step:1320 Training_loss:0.637846, Acc_avg:58.25% Training_loss_avg:0.679872
Epoch:0 Step:1328 Training_loss:0.655496, Acc_avg:59.00% Training_loss_avg:0.678738
Epoch:0 Step:1336 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:1488 Val_loss:0.683190, Val_Acc_avg:56.75%
Epoch:0 Step:1496 Training_loss:0.679065, Acc_avg:56.50% Training_loss_avg:0.687481
Epoch:0 Step:1504 Training_loss:0.669561, Acc_avg:57.00% Training_loss_avg:0.686365
Epoch:0 Step:1512 Training_loss:0.634266, Acc_avg:57.25% Training_loss_avg:0.685643
Epoch:0 Step:1520 Training_loss:0.685757, Acc_avg:56.75% Training_loss_avg:0.686179
Epoch:0 Step:1528 Training_loss:0.676810, Acc_avg:56.75% Training_loss_avg:0.686580
Epoch:0 Step:1536 Training_loss:0.591102, Acc_avg:57.75% Training_loss_avg:0.682757
Epoch:0 Step:1544 Training_loss:0.610615, Acc_avg:58.00% Training_loss_avg:0.680983
Epoch:0 Step:1552 Training_loss:0.662957, Acc_avg:58.50% Training_loss_avg:0.680297
Epoch:0 Step:1560 Training_loss:0.758877, Acc_avg:57.75% Training_loss_avg:0.681757
Epoch:0 Step:1568 Training_loss:0.717519, Acc_avg:58.00% Training_loss_avg:0.680893
Epoch:0 Step:1576 Training_loss:0.694922, Acc_avg:58.00% Training_loss_avg:0.680523
Epoch:0 Step:1584 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:1736 Val_loss:0.678570, Val_Acc_avg:58.00%
Epoch:0 Step:1744 Training_loss:0.602189, Acc_avg:56.50% Training_loss_avg:0.689232
Epoch:0 Step:1752 Training_loss:0.636324, Acc_avg:56.75% Training_loss_avg:0.688986
Epoch:0 Step:1760 Training_loss:0.625652, Acc_avg:57.00% Training_loss_avg:0.688578
Epoch:0 Step:1768 Training_loss:0.645852, Acc_avg:57.00% Training_loss_avg:0.688590
Epoch:0 Step:1776 Training_loss:0.675028, Acc_avg:56.75% Training_loss_avg:0.687510
Epoch:0 Step:1784 Training_loss:0.734251, Acc_avg:55.75% Training_loss_avg:0.690228
Epoch:0 Step:1792 Training_loss:0.598457, Acc_avg:56.25% Training_loss_avg:0.688699
Epoch:0 Step:1800 Training_loss:0.734482, Acc_avg:56.50% Training_loss_avg:0.689394
Epoch:0 Step:1808 Training_loss:0.654426, Acc_avg:57.00% Training_loss_avg:0.686430
Epoch:0 Step:1816 Training_loss:0.636995, Acc_avg:56.75% Training_loss_avg:0.686520
Epoch:0 Step:1824 Training_loss:0.746303, Acc_avg:57.50% Training_loss_avg:0.684032
Epoch:0 Step:1832 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:1984 Val_loss:0.679165, Val_Acc_avg:57.00%
Epoch:0 Step:1992 Training_loss:0.802993, Acc_avg:55.00% Training_loss_avg:0.691668
Epoch:0 Step:2000 Training_loss:0.638228, Acc_avg:55.25% Training_loss_avg:0.690194
Epoch:0 Step:2008 Training_loss:0.664689, Acc_avg:55.50% Training_loss_avg:0.689088
Epoch:0 Step:2016 Training_loss:0.671690, Acc_avg:55.25% Training_loss_avg:0.689801
Epoch:0 Step:2024 Training_loss:0.685809, Acc_avg:55.25% Training_loss_avg:0.689386
Epoch:0 Step:2032 Training_loss:0.707991, Acc_avg:55.50% Training_loss_avg:0.689283
Epoch:0 Step:2040 Training_loss:0.665923, Acc_avg:56.25% Training_loss_avg:0.687883
Epoch:0 Step:2048 Training_loss:0.619772, Acc_avg:57.00% Training_loss_avg:0.686677
Epoch:0 Step:2056 Training_loss:0.647221, Acc_avg:57.25% Training_loss_avg:0.685676
Epoch:0 Step:2064 Training_loss:0.724065, Acc_avg:57.50% Training_loss_avg:0.684887
Epoch:0 Step:2072 Training_loss:0.707461, Acc_avg:57.00% Training_loss_avg:0.686946
Epoch:0 Step:2080 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:2232 Val_loss:0.680527, Val_Acc_avg:58.75%
Epoch:0 Step:2240 Training_loss:0.640884, Acc_avg:53.75% Training_loss_avg:0.692013
Epoch:0 Step:2248 Training_loss:0.649412, Acc_avg:53.75% Training_loss_avg:0.693864
Epoch:0 Step:2256 Training_loss:0.668064, Acc_avg:54.00% Training_loss_avg:0.692685
Epoch:0 Step:2264 Training_loss:0.675619, Acc_avg:54.50% Training_loss_avg:0.690611
Epoch:0 Step:2272 Training_loss:0.688091, Acc_avg:54.50% Training_loss_avg:0.690006
Epoch:0 Step:2280 Training_loss:0.669856, Acc_avg:54.50% Training_loss_avg:0.688777
Epoch:0 Step:2288 Training_loss:0.696181, Acc_avg:54.25% Training_loss_avg:0.690997
Epoch:0 Step:2296 Training_loss:0.737689, Acc_avg:54.00% Training_loss_avg:0.692600
Epoch:0 Step:2304 Training_loss:0.723578, Acc_avg:53.50% Training_loss_avg:0.693800
Epoch:0 Step:2312 Training_loss:0.706991, Acc_avg:53.75% Training_loss_avg:0.693996
Epoch:0 Step:2320 Training_loss:0.720902, Acc_avg:53.75% Training_loss_avg:0.692094
Epoch:0 Step:2328 Tr

52it [00:07,  6.60it/s]


Epoch:0 Step:2480 Val_loss:0.681564, Val_Acc_avg:57.75%
Epoch:0 Step:2488 Training_loss:0.712447, Acc_avg:51.00% Training_loss_avg:0.693283
Epoch:0 Step:2496 Training_loss:0.676482, Acc_avg:51.00% Training_loss_avg:0.693162
Epoch:0 Step:2504 Training_loss:0.667493, Acc_avg:50.75% Training_loss_avg:0.693286
Epoch:0 Step:2512 Training_loss:0.666137, Acc_avg:51.25% Training_loss_avg:0.689411
Epoch:0 Step:2520 Training_loss:0.720680, Acc_avg:51.25% Training_loss_avg:0.690203
Epoch:0 Step:2528 Training_loss:0.730893, Acc_avg:50.50% Training_loss_avg:0.690967
Epoch:0 Step:2536 Training_loss:0.671071, Acc_avg:50.75% Training_loss_avg:0.691399
Epoch:0 Step:2544 Training_loss:0.633031, Acc_avg:51.00% Training_loss_avg:0.690445
Epoch:0 Step:2552 Training_loss:0.707764, Acc_avg:51.25% Training_loss_avg:0.690950
Epoch:0 Step:2560 Training_loss:0.693178, Acc_avg:51.00% Training_loss_avg:0.691787
Epoch:0 Step:2568 Training_loss:0.700455, Acc_avg:51.25% Training_loss_avg:0.691542
Epoch:0 Step:2576 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:2728 Val_loss:0.684117, Val_Acc_avg:56.50%
Epoch:0 Step:2736 Training_loss:0.668095, Acc_avg:52.00% Training_loss_avg:0.691500
Epoch:0 Step:2744 Training_loss:0.697412, Acc_avg:51.75% Training_loss_avg:0.692073
Epoch:0 Step:2752 Training_loss:0.715872, Acc_avg:52.00% Training_loss_avg:0.693158
Epoch:0 Step:2760 Training_loss:0.688613, Acc_avg:52.50% Training_loss_avg:0.692233
Epoch:0 Step:2768 Training_loss:0.633020, Acc_avg:52.75% Training_loss_avg:0.690763
Epoch:0 Step:2776 Training_loss:0.742272, Acc_avg:52.25% Training_loss_avg:0.691109
Epoch:0 Step:2784 Training_loss:0.678095, Acc_avg:52.25% Training_loss_avg:0.690707
Epoch:0 Step:2792 Training_loss:0.681414, Acc_avg:52.25% Training_loss_avg:0.690299
Epoch:0 Step:2800 Training_loss:0.688416, Acc_avg:52.25% Training_loss_avg:0.689758
Epoch:0 Step:2808 Training_loss:0.731677, Acc_avg:52.00% Training_loss_avg:0.691257
Epoch:0 Step:2816 Training_loss:0.731145, Acc_avg:51.25% Training_loss_avg:0.693267
Epoch:0 Step:2824 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:2976 Val_loss:0.682047, Val_Acc_avg:57.50%
Epoch:0 Step:2984 Training_loss:0.623523, Acc_avg:53.50% Training_loss_avg:0.683387
Epoch:0 Step:2992 Training_loss:0.650054, Acc_avg:53.25% Training_loss_avg:0.684899
Epoch:0 Step:3000 Training_loss:0.724778, Acc_avg:53.25% Training_loss_avg:0.686101
Epoch:0 Step:3008 Training_loss:0.750019, Acc_avg:53.75% Training_loss_avg:0.685284
Epoch:0 Step:3016 Training_loss:0.990269, Acc_avg:53.25% Training_loss_avg:0.691089
Epoch:0 Step:3024 Training_loss:0.615470, Acc_avg:54.00% Training_loss_avg:0.687829
Epoch:0 Step:3032 Training_loss:0.620715, Acc_avg:54.00% Training_loss_avg:0.686206
Epoch:0 Step:3040 Training_loss:0.632329, Acc_avg:54.25% Training_loss_avg:0.685739
Epoch:0 Step:3048 Training_loss:0.679889, Acc_avg:54.75% Training_loss_avg:0.684617
Epoch:0 Step:3056 Training_loss:0.889357, Acc_avg:53.75% Training_loss_avg:0.689124
Epoch:0 Step:3064 Training_loss:0.591778, Acc_avg:54.25% Training_loss_avg:0.686164
Epoch:0 Step:3072 Tr

52it [00:07,  6.61it/s]


Epoch:0 Step:3224 Val_loss:0.679808, Val_Acc_avg:57.75%
Epoch:0 Step:3232 Training_loss:0.671561, Acc_avg:56.25% Training_loss_avg:0.690364
Epoch:0 Step:3240 Training_loss:0.728871, Acc_avg:56.00% Training_loss_avg:0.691049
Epoch:0 Step:3248 Training_loss:0.691867, Acc_avg:56.25% Training_loss_avg:0.690731
Epoch:0 Step:3256 Training_loss:0.692300, Acc_avg:56.25% Training_loss_avg:0.690939
Epoch:0 Step:3264 Training_loss:0.674849, Acc_avg:56.00% Training_loss_avg:0.690893
Epoch:0 Step:3272 Training_loss:0.712065, Acc_avg:55.75% Training_loss_avg:0.691740
Epoch:0 Step:3280 Training_loss:0.711570, Acc_avg:56.25% Training_loss_avg:0.691994
Epoch:0 Step:3288 Training_loss:0.715425, Acc_avg:56.25% Training_loss_avg:0.692536
Epoch:0 Step:3296 Training_loss:0.675201, Acc_avg:56.50% Training_loss_avg:0.692953
Epoch:0 Step:3304 Training_loss:0.713094, Acc_avg:56.25% Training_loss_avg:0.693387
Epoch:0 Step:3312 Training_loss:0.702504, Acc_avg:57.00% Training_loss_avg:0.692744
Epoch:0 Step:3320 Tr

52it [00:07,  6.60it/s]


Epoch:1 Step:0 Val_loss:0.679753, Val_Acc_avg:58.00%
Epoch:1 Step:8 Training_loss:0.669820, Acc_avg:57.33% Training_loss_avg:0.695062
Epoch:1 Step:16 Training_loss:0.695409, Acc_avg:56.58% Training_loss_avg:0.696372
Epoch:1 Step:24 Training_loss:0.716063, Acc_avg:55.83% Training_loss_avg:0.696534
Epoch:1 Step:32 Training_loss:0.658311, Acc_avg:56.33% Training_loss_avg:0.695748
Epoch:1 Step:40 Training_loss:0.708178, Acc_avg:55.58% Training_loss_avg:0.699893
Epoch:1 Step:48 Training_loss:0.668886, Acc_avg:55.58% Training_loss_avg:0.700800
Epoch:1 Step:56 Training_loss:0.706617, Acc_avg:55.08% Training_loss_avg:0.701931
Epoch:1 Step:64 Training_loss:0.725719, Acc_avg:54.83% Training_loss_avg:0.701950
Epoch:1 Step:72 Training_loss:0.696446, Acc_avg:54.58% Training_loss_avg:0.700879
Epoch:1 Step:80 Training_loss:0.646704, Acc_avg:55.33% Training_loss_avg:0.694007
Epoch:1 Step:88 Training_loss:0.653356, Acc_avg:55.33% Training_loss_avg:0.694765
Epoch:1 Step:96 Training_loss:0.734315, Acc_av

52it [00:07,  6.60it/s]


Epoch:1 Step:248 Val_loss:0.677581, Val_Acc_avg:61.75%
Epoch:1 Step:256 Training_loss:0.673558, Acc_avg:56.08% Training_loss_avg:0.685798
Epoch:1 Step:264 Training_loss:0.685789, Acc_avg:55.83% Training_loss_avg:0.686644
Epoch:1 Step:272 Training_loss:0.706111, Acc_avg:55.83% Training_loss_avg:0.687010
Epoch:1 Step:280 Training_loss:0.704994, Acc_avg:55.58% Training_loss_avg:0.686725
Epoch:1 Step:288 Training_loss:0.705012, Acc_avg:54.58% Training_loss_avg:0.689236
Epoch:1 Step:296 Training_loss:0.667170, Acc_avg:54.58% Training_loss_avg:0.689148
Epoch:1 Step:304 Training_loss:0.693152, Acc_avg:55.08% Training_loss_avg:0.688433
Epoch:1 Step:312 Training_loss:0.687792, Acc_avg:55.08% Training_loss_avg:0.688352
Epoch:1 Step:320 Training_loss:0.665653, Acc_avg:55.08% Training_loss_avg:0.687819
Epoch:1 Step:328 Training_loss:0.687806, Acc_avg:55.08% Training_loss_avg:0.688078
Epoch:1 Step:336 Training_loss:0.654936, Acc_avg:55.33% Training_loss_avg:0.686936
Epoch:1 Step:344 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:1 Step:496 Val_loss:0.678151, Val_Acc_avg:58.00%
Epoch:1 Step:504 Training_loss:0.496980, Acc_avg:57.25% Training_loss_avg:0.679387
Epoch:1 Step:512 Training_loss:0.684079, Acc_avg:57.25% Training_loss_avg:0.679046
Epoch:1 Step:520 Training_loss:0.668672, Acc_avg:57.00% Training_loss_avg:0.679088
Epoch:1 Step:528 Training_loss:0.644888, Acc_avg:57.00% Training_loss_avg:0.678454
Epoch:1 Step:536 Training_loss:0.743718, Acc_avg:56.50% Training_loss_avg:0.679950
Epoch:1 Step:544 Training_loss:0.704625, Acc_avg:56.50% Training_loss_avg:0.681151
Epoch:1 Step:552 Training_loss:0.636903, Acc_avg:57.25% Training_loss_avg:0.680161
Epoch:1 Step:560 Training_loss:0.638226, Acc_avg:57.25% Training_loss_avg:0.679383
Epoch:1 Step:568 Training_loss:0.656710, Acc_avg:56.75% Training_loss_avg:0.679594
Epoch:1 Step:576 Training_loss:0.740176, Acc_avg:56.50% Training_loss_avg:0.681175
Epoch:1 Step:584 Training_loss:0.784135, Acc_avg:56.50% Training_loss_avg:0.682312
Epoch:1 Step:592 Training_loss:0

52it [00:07,  6.60it/s]


Epoch:1 Step:744 Val_loss:0.672608, Val_Acc_avg:59.00%
Epoch:1 Step:752 Training_loss:0.694678, Acc_avg:55.75% Training_loss_avg:0.684052
Epoch:1 Step:760 Training_loss:0.657191, Acc_avg:55.75% Training_loss_avg:0.683037
Epoch:1 Step:768 Training_loss:0.648368, Acc_avg:56.25% Training_loss_avg:0.682484
Epoch:1 Step:776 Training_loss:0.627514, Acc_avg:55.75% Training_loss_avg:0.682888
Epoch:1 Step:784 Training_loss:0.664413, Acc_avg:56.25% Training_loss_avg:0.681458
Epoch:1 Step:792 Training_loss:0.670897, Acc_avg:56.25% Training_loss_avg:0.681275
Epoch:1 Step:800 Training_loss:0.633956, Acc_avg:57.50% Training_loss_avg:0.678243
Epoch:1 Step:808 Training_loss:0.723413, Acc_avg:57.00% Training_loss_avg:0.679524
Epoch:1 Step:816 Training_loss:0.704214, Acc_avg:56.25% Training_loss_avg:0.681682
Epoch:1 Step:824 Training_loss:0.735647, Acc_avg:55.50% Training_loss_avg:0.683939
Epoch:1 Step:832 Training_loss:0.673160, Acc_avg:55.75% Training_loss_avg:0.682570
Epoch:1 Step:840 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:1 Step:992 Val_loss:0.664347, Val_Acc_avg:60.75%
Epoch:1 Step:1000 Training_loss:0.616073, Acc_avg:56.00% Training_loss_avg:0.676375
Epoch:1 Step:1008 Training_loss:0.683208, Acc_avg:56.25% Training_loss_avg:0.675007
Epoch:1 Step:1016 Training_loss:0.793628, Acc_avg:56.00% Training_loss_avg:0.675192
Epoch:1 Step:1024 Training_loss:0.679220, Acc_avg:56.25% Training_loss_avg:0.675668
Epoch:1 Step:1032 Training_loss:0.616566, Acc_avg:56.75% Training_loss_avg:0.673664
Epoch:1 Step:1040 Training_loss:0.654882, Acc_avg:56.75% Training_loss_avg:0.673383
Epoch:1 Step:1048 Training_loss:0.635276, Acc_avg:57.25% Training_loss_avg:0.671640
Epoch:1 Step:1056 Training_loss:0.657767, Acc_avg:57.25% Training_loss_avg:0.671625
Epoch:1 Step:1064 Training_loss:0.578913, Acc_avg:57.75% Training_loss_avg:0.670301
Epoch:1 Step:1072 Training_loss:0.604612, Acc_avg:58.00% Training_loss_avg:0.668742
Epoch:1 Step:1080 Training_loss:0.647322, Acc_avg:58.00% Training_loss_avg:0.667856
Epoch:1 Step:1088 Tra

52it [00:07,  6.62it/s]


Epoch:1 Step:1240 Val_loss:0.664413, Val_Acc_avg:60.00%
Epoch:1 Step:1248 Training_loss:0.610494, Acc_avg:56.00% Training_loss_avg:0.671974
Epoch:1 Step:1256 Training_loss:0.666923, Acc_avg:55.50% Training_loss_avg:0.672632
Epoch:1 Step:1264 Training_loss:0.660704, Acc_avg:56.00% Training_loss_avg:0.671351
Epoch:1 Step:1272 Training_loss:0.606503, Acc_avg:56.00% Training_loss_avg:0.670271
Epoch:1 Step:1280 Training_loss:0.646092, Acc_avg:56.25% Training_loss_avg:0.670356
Epoch:1 Step:1288 Training_loss:0.619928, Acc_avg:56.50% Training_loss_avg:0.671187
Epoch:1 Step:1296 Training_loss:0.613918, Acc_avg:57.00% Training_loss_avg:0.668355
Epoch:1 Step:1304 Training_loss:0.612123, Acc_avg:57.75% Training_loss_avg:0.666940
Epoch:1 Step:1312 Training_loss:0.682934, Acc_avg:57.75% Training_loss_avg:0.667254
Epoch:1 Step:1320 Training_loss:0.682737, Acc_avg:57.50% Training_loss_avg:0.667610
Epoch:1 Step:1328 Training_loss:0.660189, Acc_avg:57.75% Training_loss_avg:0.666868
Epoch:1 Step:1336 Tr

52it [00:07,  6.62it/s]


Epoch:1 Step:1488 Val_loss:0.655498, Val_Acc_avg:61.50%
Epoch:1 Step:1496 Training_loss:0.570956, Acc_avg:58.25% Training_loss_avg:0.661850
Epoch:1 Step:1504 Training_loss:0.721067, Acc_avg:57.75% Training_loss_avg:0.663110
Epoch:1 Step:1512 Training_loss:0.649511, Acc_avg:58.00% Training_loss_avg:0.661322
Epoch:1 Step:1520 Training_loss:0.586341, Acc_avg:58.00% Training_loss_avg:0.660054
Epoch:1 Step:1528 Training_loss:0.647555, Acc_avg:58.00% Training_loss_avg:0.660453
Epoch:1 Step:1536 Training_loss:0.757852, Acc_avg:57.50% Training_loss_avg:0.663337
Epoch:1 Step:1544 Training_loss:0.708326, Acc_avg:57.00% Training_loss_avg:0.663771
Epoch:1 Step:1552 Training_loss:0.649533, Acc_avg:57.75% Training_loss_avg:0.660647
Epoch:1 Step:1560 Training_loss:0.623811, Acc_avg:57.50% Training_loss_avg:0.660364
Epoch:1 Step:1568 Training_loss:0.624606, Acc_avg:57.50% Training_loss_avg:0.660038
Epoch:1 Step:1576 Training_loss:0.653515, Acc_avg:58.25% Training_loss_avg:0.657867
Epoch:1 Step:1584 Tr

52it [00:07,  6.62it/s]


Epoch:1 Step:1736 Val_loss:0.651305, Val_Acc_avg:61.25%
Epoch:1 Step:1744 Training_loss:0.579205, Acc_avg:57.25% Training_loss_avg:0.666375
Epoch:1 Step:1752 Training_loss:0.638024, Acc_avg:57.75% Training_loss_avg:0.665327
Epoch:1 Step:1760 Training_loss:0.657622, Acc_avg:57.50% Training_loss_avg:0.665350
Epoch:1 Step:1768 Training_loss:0.507856, Acc_avg:57.75% Training_loss_avg:0.663164
Epoch:1 Step:1776 Training_loss:0.600903, Acc_avg:57.75% Training_loss_avg:0.661434
Epoch:1 Step:1784 Training_loss:0.660248, Acc_avg:57.50% Training_loss_avg:0.660998
Epoch:1 Step:1792 Training_loss:0.545064, Acc_avg:57.75% Training_loss_avg:0.658760
Epoch:1 Step:1800 Training_loss:0.632619, Acc_avg:56.50% Training_loss_avg:0.660621
Epoch:1 Step:1808 Training_loss:0.688923, Acc_avg:56.75% Training_loss_avg:0.659263
Epoch:1 Step:1816 Training_loss:0.595956, Acc_avg:57.00% Training_loss_avg:0.658484
Epoch:1 Step:1824 Training_loss:0.701818, Acc_avg:57.25% Training_loss_avg:0.660288
Epoch:1 Step:1832 Tr

52it [00:07,  6.62it/s]


Epoch:1 Step:1984 Val_loss:0.653130, Val_Acc_avg:59.75%
Epoch:1 Step:1992 Training_loss:0.722294, Acc_avg:55.25% Training_loss_avg:0.669243
Epoch:1 Step:2000 Training_loss:0.706214, Acc_avg:54.75% Training_loss_avg:0.669157
Epoch:1 Step:2008 Training_loss:0.815434, Acc_avg:54.25% Training_loss_avg:0.671418
Epoch:1 Step:2016 Training_loss:0.612784, Acc_avg:55.25% Training_loss_avg:0.668999
Epoch:1 Step:2024 Training_loss:0.747577, Acc_avg:55.00% Training_loss_avg:0.670589
Epoch:1 Step:2032 Training_loss:0.645155, Acc_avg:56.00% Training_loss_avg:0.667378
Epoch:1 Step:2040 Training_loss:0.652728, Acc_avg:56.25% Training_loss_avg:0.666796
Epoch:1 Step:2048 Training_loss:0.629099, Acc_avg:56.00% Training_loss_avg:0.666813
Epoch:1 Step:2056 Training_loss:0.613379, Acc_avg:56.25% Training_loss_avg:0.667229
Epoch:1 Step:2064 Training_loss:0.701983, Acc_avg:56.25% Training_loss_avg:0.666280
Epoch:1 Step:2072 Training_loss:0.642541, Acc_avg:56.25% Training_loss_avg:0.664298
Epoch:1 Step:2080 Tr

52it [00:07,  6.62it/s]


Epoch:1 Step:2232 Val_loss:0.643686, Val_Acc_avg:63.00%
Epoch:1 Step:2240 Training_loss:0.677246, Acc_avg:55.00% Training_loss_avg:0.679383
Epoch:1 Step:2248 Training_loss:0.580222, Acc_avg:55.75% Training_loss_avg:0.675489
Epoch:1 Step:2256 Training_loss:0.656823, Acc_avg:55.75% Training_loss_avg:0.673842
Epoch:1 Step:2264 Training_loss:0.721698, Acc_avg:56.00% Training_loss_avg:0.672717
Epoch:1 Step:2272 Training_loss:0.672242, Acc_avg:56.50% Training_loss_avg:0.672945
Epoch:1 Step:2280 Training_loss:0.723898, Acc_avg:56.00% Training_loss_avg:0.676201
Epoch:1 Step:2288 Training_loss:0.658577, Acc_avg:56.50% Training_loss_avg:0.676446
Epoch:1 Step:2296 Training_loss:0.644219, Acc_avg:56.50% Training_loss_avg:0.677237
Epoch:1 Step:2304 Training_loss:0.702596, Acc_avg:57.00% Training_loss_avg:0.676219
Epoch:1 Step:2312 Training_loss:0.601971, Acc_avg:58.00% Training_loss_avg:0.674432
Epoch:1 Step:2320 Training_loss:0.725967, Acc_avg:57.50% Training_loss_avg:0.675799
Epoch:1 Step:2328 Tr

52it [00:07,  6.61it/s]


Epoch:1 Step:2480 Val_loss:0.647094, Val_Acc_avg:62.00%
Epoch:1 Step:2488 Training_loss:0.685628, Acc_avg:60.00% Training_loss_avg:0.665983
Epoch:1 Step:2496 Training_loss:0.612000, Acc_avg:60.00% Training_loss_avg:0.665468
Epoch:1 Step:2504 Training_loss:0.697183, Acc_avg:59.75% Training_loss_avg:0.666588
Epoch:1 Step:2512 Training_loss:0.694877, Acc_avg:59.75% Training_loss_avg:0.666198
Epoch:1 Step:2520 Training_loss:0.667238, Acc_avg:59.75% Training_loss_avg:0.664440
Epoch:1 Step:2528 Training_loss:0.766898, Acc_avg:59.00% Training_loss_avg:0.668213
Epoch:1 Step:2536 Training_loss:0.608109, Acc_avg:59.25% Training_loss_avg:0.668407
Epoch:1 Step:2544 Training_loss:0.674039, Acc_avg:59.75% Training_loss_avg:0.666987
Epoch:1 Step:2552 Training_loss:0.670451, Acc_avg:59.50% Training_loss_avg:0.669249
Epoch:1 Step:2560 Training_loss:0.646797, Acc_avg:59.50% Training_loss_avg:0.669735
Epoch:1 Step:2568 Training_loss:0.786424, Acc_avg:59.75% Training_loss_avg:0.669056
Epoch:1 Step:2576 Tr

52it [00:07,  6.62it/s]


Epoch:1 Step:2728 Val_loss:0.660897, Val_Acc_avg:61.25%
Epoch:1 Step:2736 Training_loss:0.817991, Acc_avg:61.50% Training_loss_avg:0.666179
Epoch:1 Step:2744 Training_loss:0.761004, Acc_avg:61.25% Training_loss_avg:0.667401
Epoch:1 Step:2752 Training_loss:0.694688, Acc_avg:61.25% Training_loss_avg:0.666728
Epoch:1 Step:2760 Training_loss:0.560770, Acc_avg:61.50% Training_loss_avg:0.665544
Epoch:1 Step:2768 Training_loss:0.666823, Acc_avg:61.75% Training_loss_avg:0.666001
Epoch:1 Step:2776 Training_loss:0.829889, Acc_avg:60.75% Training_loss_avg:0.669446
Epoch:1 Step:2784 Training_loss:0.540583, Acc_avg:61.75% Training_loss_avg:0.667288
Epoch:1 Step:2792 Training_loss:0.559638, Acc_avg:62.25% Training_loss_avg:0.665001
Epoch:1 Step:2800 Training_loss:0.598874, Acc_avg:62.50% Training_loss_avg:0.665598
Epoch:1 Step:2808 Training_loss:0.821802, Acc_avg:61.50% Training_loss_avg:0.669913
Epoch:1 Step:2816 Training_loss:0.705248, Acc_avg:61.25% Training_loss_avg:0.670258
Epoch:1 Step:2824 Tr

52it [00:07,  6.62it/s]


Epoch:1 Step:2976 Val_loss:0.647148, Val_Acc_avg:61.25%
Epoch:1 Step:2984 Training_loss:0.877431, Acc_avg:62.75% Training_loss_avg:0.655434
Epoch:1 Step:2992 Training_loss:0.872519, Acc_avg:61.50% Training_loss_avg:0.662635
Epoch:1 Step:3000 Training_loss:0.814545, Acc_avg:60.75% Training_loss_avg:0.665624
Epoch:1 Step:3008 Training_loss:0.624640, Acc_avg:60.50% Training_loss_avg:0.664686
Epoch:1 Step:3016 Training_loss:0.694160, Acc_avg:60.50% Training_loss_avg:0.666181
Epoch:1 Step:3024 Training_loss:0.713705, Acc_avg:60.00% Training_loss_avg:0.669397
Epoch:1 Step:3032 Training_loss:0.641880, Acc_avg:60.25% Training_loss_avg:0.670749
Epoch:1 Step:3040 Training_loss:0.638924, Acc_avg:60.25% Training_loss_avg:0.672003
Epoch:1 Step:3048 Training_loss:0.669178, Acc_avg:60.25% Training_loss_avg:0.671461
Epoch:1 Step:3056 Training_loss:0.731123, Acc_avg:59.50% Training_loss_avg:0.672771
Epoch:1 Step:3064 Training_loss:0.696321, Acc_avg:59.25% Training_loss_avg:0.675727
Epoch:1 Step:3072 Tr

52it [00:07,  6.62it/s]


Epoch:1 Step:3224 Val_loss:0.643625, Val_Acc_avg:61.50%
Epoch:1 Step:3232 Training_loss:0.572384, Acc_avg:59.00% Training_loss_avg:0.663863
Epoch:1 Step:3240 Training_loss:0.518759, Acc_avg:59.50% Training_loss_avg:0.660481
Epoch:1 Step:3248 Training_loss:0.572070, Acc_avg:60.00% Training_loss_avg:0.659204
Epoch:1 Step:3256 Training_loss:0.613231, Acc_avg:59.75% Training_loss_avg:0.659540
Epoch:1 Step:3264 Training_loss:0.565156, Acc_avg:60.25% Training_loss_avg:0.656939
Epoch:1 Step:3272 Training_loss:0.738179, Acc_avg:59.25% Training_loss_avg:0.662627
Epoch:1 Step:3280 Training_loss:0.716741, Acc_avg:59.25% Training_loss_avg:0.663723
Epoch:1 Step:3288 Training_loss:0.679564, Acc_avg:59.50% Training_loss_avg:0.663346
Epoch:1 Step:3296 Training_loss:0.749145, Acc_avg:59.25% Training_loss_avg:0.664838
Epoch:1 Step:3304 Training_loss:0.569476, Acc_avg:59.00% Training_loss_avg:0.665095
Epoch:1 Step:3312 Training_loss:0.856433, Acc_avg:57.75% Training_loss_avg:0.673162
Epoch:1 Step:3320 Tr

52it [00:07,  6.62it/s]


Epoch:2 Step:0 Val_loss:0.654393, Val_Acc_avg:61.75%
Epoch:2 Step:8 Training_loss:0.542177, Acc_avg:57.83% Training_loss_avg:0.673770
Epoch:2 Step:16 Training_loss:0.677563, Acc_avg:57.83% Training_loss_avg:0.674027
Epoch:2 Step:24 Training_loss:0.704706, Acc_avg:58.33% Training_loss_avg:0.672853
Epoch:2 Step:32 Training_loss:0.804667, Acc_avg:57.83% Training_loss_avg:0.675349
Epoch:2 Step:40 Training_loss:0.614100, Acc_avg:58.58% Training_loss_avg:0.673703
Epoch:2 Step:48 Training_loss:0.619175, Acc_avg:59.33% Training_loss_avg:0.668538
Epoch:2 Step:56 Training_loss:0.571447, Acc_avg:60.08% Training_loss_avg:0.662516
Epoch:2 Step:64 Training_loss:0.789564, Acc_avg:60.33% Training_loss_avg:0.662017
Epoch:2 Step:72 Training_loss:0.656805, Acc_avg:60.08% Training_loss_avg:0.662660
Epoch:2 Step:80 Training_loss:0.504824, Acc_avg:60.58% Training_loss_avg:0.658873
Epoch:2 Step:88 Training_loss:0.584713, Acc_avg:61.08% Training_loss_avg:0.656293
Epoch:2 Step:96 Training_loss:0.539282, Acc_av

52it [00:07,  6.62it/s]


Epoch:2 Step:248 Val_loss:0.635818, Val_Acc_avg:61.75%
Epoch:2 Step:256 Training_loss:0.595680, Acc_avg:62.58% Training_loss_avg:0.640426
Epoch:2 Step:264 Training_loss:0.536917, Acc_avg:62.58% Training_loss_avg:0.637539
Epoch:2 Step:272 Training_loss:0.565752, Acc_avg:62.58% Training_loss_avg:0.636908
Epoch:2 Step:280 Training_loss:0.628722, Acc_avg:63.08% Training_loss_avg:0.635927
Epoch:2 Step:288 Training_loss:0.597868, Acc_avg:62.83% Training_loss_avg:0.637550
Epoch:2 Step:296 Training_loss:0.731787, Acc_avg:62.08% Training_loss_avg:0.640738
Epoch:2 Step:304 Training_loss:0.671802, Acc_avg:61.58% Training_loss_avg:0.643799
Epoch:2 Step:312 Training_loss:0.598100, Acc_avg:61.58% Training_loss_avg:0.644320
Epoch:2 Step:320 Training_loss:0.548143, Acc_avg:61.83% Training_loss_avg:0.643018
Epoch:2 Step:328 Training_loss:0.528531, Acc_avg:61.83% Training_loss_avg:0.642286
Epoch:2 Step:336 Training_loss:0.618673, Acc_avg:62.08% Training_loss_avg:0.639896
Epoch:2 Step:344 Training_loss:0

52it [00:07,  6.62it/s]


Epoch:2 Step:496 Val_loss:0.635619, Val_Acc_avg:61.50%
Epoch:2 Step:504 Training_loss:0.717300, Acc_avg:63.50% Training_loss_avg:0.629205
Epoch:2 Step:512 Training_loss:0.565189, Acc_avg:64.00% Training_loss_avg:0.625733
Epoch:2 Step:520 Training_loss:0.683338, Acc_avg:64.75% Training_loss_avg:0.623670
Epoch:2 Step:528 Training_loss:0.783611, Acc_avg:64.00% Training_loss_avg:0.629512
Epoch:2 Step:536 Training_loss:0.568765, Acc_avg:64.25% Training_loss_avg:0.627703
Epoch:2 Step:544 Training_loss:0.503986, Acc_avg:65.00% Training_loss_avg:0.624240
Epoch:2 Step:552 Training_loss:0.537460, Acc_avg:65.00% Training_loss_avg:0.621609
Epoch:2 Step:560 Training_loss:0.612757, Acc_avg:65.50% Training_loss_avg:0.619710
Epoch:2 Step:568 Training_loss:0.774887, Acc_avg:65.25% Training_loss_avg:0.623030
Epoch:2 Step:576 Training_loss:0.549942, Acc_avg:65.25% Training_loss_avg:0.622213
Epoch:2 Step:584 Training_loss:0.652434, Acc_avg:65.50% Training_loss_avg:0.621128
Epoch:2 Step:592 Training_loss:0

52it [00:07,  6.62it/s]


Epoch:2 Step:744 Val_loss:0.633114, Val_Acc_avg:64.75%
Epoch:2 Step:752 Training_loss:0.720455, Acc_avg:60.00% Training_loss_avg:0.649598
Epoch:2 Step:760 Training_loss:0.712386, Acc_avg:59.75% Training_loss_avg:0.651655
Epoch:2 Step:768 Training_loss:0.603705, Acc_avg:59.75% Training_loss_avg:0.652952
Epoch:2 Step:776 Training_loss:0.524621, Acc_avg:60.00% Training_loss_avg:0.650142
Epoch:2 Step:784 Training_loss:0.666337, Acc_avg:59.75% Training_loss_avg:0.651065
Epoch:2 Step:792 Training_loss:0.631471, Acc_avg:59.75% Training_loss_avg:0.649878
Epoch:2 Step:800 Training_loss:0.572295, Acc_avg:59.25% Training_loss_avg:0.651318
Epoch:2 Step:808 Training_loss:0.621339, Acc_avg:59.50% Training_loss_avg:0.648865
Epoch:2 Step:816 Training_loss:0.665209, Acc_avg:59.50% Training_loss_avg:0.646591
Epoch:2 Step:824 Training_loss:0.740947, Acc_avg:59.25% Training_loss_avg:0.645951
Epoch:2 Step:832 Training_loss:0.541258, Acc_avg:59.75% Training_loss_avg:0.643995
Epoch:2 Step:840 Training_loss:0

52it [00:07,  6.62it/s]


Epoch:2 Step:992 Val_loss:0.636439, Val_Acc_avg:61.75%
Epoch:2 Step:1000 Training_loss:0.779127, Acc_avg:59.25% Training_loss_avg:0.646546
Epoch:2 Step:1008 Training_loss:0.826879, Acc_avg:58.75% Training_loss_avg:0.649590
Epoch:2 Step:1016 Training_loss:0.567338, Acc_avg:59.50% Training_loss_avg:0.647257
Epoch:2 Step:1024 Training_loss:0.513807, Acc_avg:59.75% Training_loss_avg:0.645015
Epoch:2 Step:1032 Training_loss:0.540653, Acc_avg:60.75% Training_loss_avg:0.641660
Epoch:2 Step:1040 Training_loss:0.718484, Acc_avg:60.75% Training_loss_avg:0.643388
Epoch:2 Step:1048 Training_loss:0.609747, Acc_avg:61.25% Training_loss_avg:0.641607
Epoch:2 Step:1056 Training_loss:0.644459, Acc_avg:61.00% Training_loss_avg:0.642411
Epoch:2 Step:1064 Training_loss:0.617839, Acc_avg:61.50% Training_loss_avg:0.640520
Epoch:2 Step:1072 Training_loss:0.605789, Acc_avg:62.00% Training_loss_avg:0.638545
Epoch:2 Step:1080 Training_loss:0.572956, Acc_avg:61.50% Training_loss_avg:0.638608
Epoch:2 Step:1088 Tra

52it [00:07,  6.62it/s]


Epoch:2 Step:1240 Val_loss:0.634899, Val_Acc_avg:61.50%
Epoch:2 Step:1248 Training_loss:0.499887, Acc_avg:61.50% Training_loss_avg:0.642301
Epoch:2 Step:1256 Training_loss:0.491455, Acc_avg:62.25% Training_loss_avg:0.636842
Epoch:2 Step:1264 Training_loss:0.692815, Acc_avg:61.75% Training_loss_avg:0.637282
Epoch:2 Step:1272 Training_loss:0.616099, Acc_avg:62.00% Training_loss_avg:0.636349
Epoch:2 Step:1280 Training_loss:0.568260, Acc_avg:62.00% Training_loss_avg:0.633979
Epoch:2 Step:1288 Training_loss:0.759657, Acc_avg:62.00% Training_loss_avg:0.633993
Epoch:2 Step:1296 Training_loss:0.860276, Acc_avg:61.25% Training_loss_avg:0.636962
Epoch:2 Step:1304 Training_loss:0.624137, Acc_avg:61.25% Training_loss_avg:0.636762
Epoch:2 Step:1312 Training_loss:0.439042, Acc_avg:61.25% Training_loss_avg:0.634343
Epoch:2 Step:1320 Training_loss:0.718086, Acc_avg:61.00% Training_loss_avg:0.635423
Epoch:2 Step:1328 Training_loss:0.714113, Acc_avg:61.00% Training_loss_avg:0.637503
Epoch:2 Step:1336 Tr

52it [00:07,  6.62it/s]


Epoch:2 Step:1488 Val_loss:0.630270, Val_Acc_avg:62.75%
Epoch:2 Step:1496 Training_loss:0.691062, Acc_avg:60.25% Training_loss_avg:0.637321
Epoch:2 Step:1504 Training_loss:0.609797, Acc_avg:61.25% Training_loss_avg:0.633378
Epoch:2 Step:1512 Training_loss:0.640692, Acc_avg:61.25% Training_loss_avg:0.633292
Epoch:2 Step:1520 Training_loss:0.597864, Acc_avg:61.50% Training_loss_avg:0.631407
Epoch:2 Step:1528 Training_loss:0.645770, Acc_avg:61.50% Training_loss_avg:0.630564
Epoch:2 Step:1536 Training_loss:0.506555, Acc_avg:61.75% Training_loss_avg:0.626584
Epoch:2 Step:1544 Training_loss:0.570329, Acc_avg:61.75% Training_loss_avg:0.628117
Epoch:2 Step:1552 Training_loss:0.835489, Acc_avg:61.50% Training_loss_avg:0.631269
Epoch:2 Step:1560 Training_loss:0.713222, Acc_avg:61.25% Training_loss_avg:0.633316
Epoch:2 Step:1568 Training_loss:0.772690, Acc_avg:60.25% Training_loss_avg:0.635082
Epoch:2 Step:1576 Training_loss:0.703606, Acc_avg:59.75% Training_loss_avg:0.636209
Epoch:2 Step:1584 Tr

52it [00:07,  6.62it/s]


Epoch:2 Step:1736 Val_loss:0.637018, Val_Acc_avg:62.25%
Epoch:2 Step:1744 Training_loss:0.647248, Acc_avg:60.25% Training_loss_avg:0.642448
Epoch:2 Step:1752 Training_loss:0.916398, Acc_avg:60.25% Training_loss_avg:0.643968
Epoch:2 Step:1760 Training_loss:0.710129, Acc_avg:59.50% Training_loss_avg:0.648332
Epoch:2 Step:1768 Training_loss:0.475910, Acc_avg:60.25% Training_loss_avg:0.643037
Epoch:2 Step:1776 Training_loss:0.522321, Acc_avg:60.25% Training_loss_avg:0.640651
Epoch:2 Step:1784 Training_loss:0.699729, Acc_avg:59.00% Training_loss_avg:0.645296
Epoch:2 Step:1792 Training_loss:0.486472, Acc_avg:59.00% Training_loss_avg:0.645617
Epoch:2 Step:1800 Training_loss:0.713784, Acc_avg:58.50% Training_loss_avg:0.646959
Epoch:2 Step:1808 Training_loss:0.696223, Acc_avg:58.50% Training_loss_avg:0.647693
Epoch:2 Step:1816 Training_loss:0.482763, Acc_avg:58.25% Training_loss_avg:0.646371
Epoch:2 Step:1824 Training_loss:0.634123, Acc_avg:58.25% Training_loss_avg:0.645982
Epoch:2 Step:1832 Tr

52it [00:07,  6.62it/s]


Epoch:2 Step:1984 Val_loss:0.640218, Val_Acc_avg:62.25%
Epoch:2 Step:1992 Training_loss:0.725153, Acc_avg:59.75% Training_loss_avg:0.653477
Epoch:2 Step:2000 Training_loss:0.442473, Acc_avg:60.50% Training_loss_avg:0.649533
Epoch:2 Step:2008 Training_loss:0.747229, Acc_avg:60.25% Training_loss_avg:0.649876
Epoch:2 Step:2016 Training_loss:0.599570, Acc_avg:61.00% Training_loss_avg:0.648921
Epoch:2 Step:2024 Training_loss:0.574756, Acc_avg:61.25% Training_loss_avg:0.647713
Epoch:2 Step:2032 Training_loss:0.592362, Acc_avg:61.00% Training_loss_avg:0.647708
Epoch:2 Step:2040 Training_loss:0.653746, Acc_avg:60.25% Training_loss_avg:0.647375
Epoch:2 Step:2048 Training_loss:0.734933, Acc_avg:59.75% Training_loss_avg:0.649012
Epoch:2 Step:2056 Training_loss:0.587612, Acc_avg:60.50% Training_loss_avg:0.644673
Epoch:2 Step:2064 Training_loss:0.451542, Acc_avg:61.75% Training_loss_avg:0.639303
Epoch:2 Step:2072 Training_loss:0.603801, Acc_avg:61.75% Training_loss_avg:0.640396
Epoch:2 Step:2080 Tr

52it [00:07,  6.62it/s]


Epoch:2 Step:2232 Val_loss:0.641910, Val_Acc_avg:62.25%
Epoch:2 Step:2240 Training_loss:0.805815, Acc_avg:64.00% Training_loss_avg:0.626597
Epoch:2 Step:2248 Training_loss:0.673918, Acc_avg:63.50% Training_loss_avg:0.628928
Epoch:2 Step:2256 Training_loss:0.584766, Acc_avg:64.50% Training_loss_avg:0.625242
Epoch:2 Step:2264 Training_loss:0.572942, Acc_avg:64.50% Training_loss_avg:0.624283
Epoch:2 Step:2272 Training_loss:0.612126, Acc_avg:64.25% Training_loss_avg:0.624476
Epoch:2 Step:2280 Training_loss:0.624116, Acc_avg:64.00% Training_loss_avg:0.626209
Epoch:2 Step:2288 Training_loss:0.679969, Acc_avg:63.75% Training_loss_avg:0.626526
Epoch:2 Step:2296 Training_loss:0.613211, Acc_avg:63.50% Training_loss_avg:0.624907
Epoch:2 Step:2304 Training_loss:0.657522, Acc_avg:63.50% Training_loss_avg:0.625348
Epoch:2 Step:2312 Training_loss:0.507125, Acc_avg:64.00% Training_loss_avg:0.623777
Epoch:2 Step:2320 Training_loss:0.892034, Acc_avg:63.25% Training_loss_avg:0.628002
Epoch:2 Step:2328 Tr

52it [00:07,  6.62it/s]


Epoch:2 Step:2480 Val_loss:0.644104, Val_Acc_avg:60.50%
Epoch:2 Step:2488 Training_loss:0.674552, Acc_avg:65.25% Training_loss_avg:0.623078
Epoch:2 Step:2496 Training_loss:0.601170, Acc_avg:65.25% Training_loss_avg:0.622624
Epoch:2 Step:2504 Training_loss:0.572080, Acc_avg:65.50% Training_loss_avg:0.622926
Epoch:2 Step:2512 Training_loss:0.632787, Acc_avg:65.50% Training_loss_avg:0.622007
Epoch:2 Step:2520 Training_loss:0.727030, Acc_avg:64.50% Training_loss_avg:0.626677
Epoch:2 Step:2528 Training_loss:0.783527, Acc_avg:64.00% Training_loss_avg:0.630985
Epoch:2 Step:2536 Training_loss:0.638739, Acc_avg:64.00% Training_loss_avg:0.632499
Epoch:2 Step:2544 Training_loss:0.721622, Acc_avg:63.75% Training_loss_avg:0.633840
Epoch:2 Step:2552 Training_loss:0.537650, Acc_avg:64.00% Training_loss_avg:0.631090
Epoch:2 Step:2560 Training_loss:0.614650, Acc_avg:64.75% Training_loss_avg:0.630239
Epoch:2 Step:2568 Training_loss:0.506977, Acc_avg:65.25% Training_loss_avg:0.631563
Epoch:2 Step:2576 Tr

52it [00:07,  6.62it/s]


Epoch:2 Step:2728 Val_loss:0.639041, Val_Acc_avg:63.00%
Epoch:2 Step:2736 Training_loss:0.631098, Acc_avg:64.00% Training_loss_avg:0.647570
Epoch:2 Step:2744 Training_loss:0.843473, Acc_avg:63.25% Training_loss_avg:0.651978
Epoch:2 Step:2752 Training_loss:0.490703, Acc_avg:63.75% Training_loss_avg:0.646951
Epoch:2 Step:2760 Training_loss:0.626921, Acc_avg:63.25% Training_loss_avg:0.647870
Epoch:2 Step:2768 Training_loss:0.624423, Acc_avg:63.00% Training_loss_avg:0.649484
Epoch:2 Step:2776 Training_loss:0.464071, Acc_avg:62.75% Training_loss_avg:0.646704
Epoch:2 Step:2784 Training_loss:0.750012, Acc_avg:61.75% Training_loss_avg:0.648525
Epoch:2 Step:2792 Training_loss:0.585161, Acc_avg:61.25% Training_loss_avg:0.648696
Epoch:2 Step:2800 Training_loss:0.645837, Acc_avg:61.50% Training_loss_avg:0.647827
Epoch:2 Step:2808 Training_loss:0.653974, Acc_avg:62.00% Training_loss_avg:0.648296
Epoch:2 Step:2816 Training_loss:0.659840, Acc_avg:61.25% Training_loss_avg:0.649777
Epoch:2 Step:2824 Tr

52it [00:07,  6.62it/s]


Epoch:2 Step:2976 Val_loss:0.634350, Val_Acc_avg:63.25%
Epoch:2 Step:2984 Training_loss:0.733336, Acc_avg:60.25% Training_loss_avg:0.645185
Epoch:2 Step:2992 Training_loss:0.750122, Acc_avg:60.25% Training_loss_avg:0.646071
Epoch:2 Step:3000 Training_loss:0.569180, Acc_avg:60.50% Training_loss_avg:0.640483
Epoch:2 Step:3008 Training_loss:0.798335, Acc_avg:60.00% Training_loss_avg:0.644659
Epoch:2 Step:3016 Training_loss:0.647649, Acc_avg:59.75% Training_loss_avg:0.645333
Epoch:2 Step:3024 Training_loss:0.572712, Acc_avg:60.00% Training_loss_avg:0.641426
Epoch:2 Step:3032 Training_loss:0.789505, Acc_avg:60.00% Training_loss_avg:0.639676
Epoch:2 Step:3040 Training_loss:0.667893, Acc_avg:59.00% Training_loss_avg:0.642500
Epoch:2 Step:3048 Training_loss:0.523515, Acc_avg:59.50% Training_loss_avg:0.638838
Epoch:2 Step:3056 Training_loss:0.576206, Acc_avg:59.25% Training_loss_avg:0.637428
Epoch:2 Step:3064 Training_loss:0.577919, Acc_avg:58.50% Training_loss_avg:0.639551
Epoch:2 Step:3072 Tr

52it [00:07,  6.62it/s]


Epoch:2 Step:3224 Val_loss:0.631627, Val_Acc_avg:61.50%
Epoch:2 Step:3232 Training_loss:0.604654, Acc_avg:61.50% Training_loss_avg:0.623803
Epoch:2 Step:3240 Training_loss:0.642367, Acc_avg:61.75% Training_loss_avg:0.626188
Epoch:2 Step:3248 Training_loss:0.568574, Acc_avg:62.00% Training_loss_avg:0.626120
Epoch:2 Step:3256 Training_loss:0.591219, Acc_avg:62.50% Training_loss_avg:0.625075
Epoch:2 Step:3264 Training_loss:0.756911, Acc_avg:61.75% Training_loss_avg:0.628288
Epoch:2 Step:3272 Training_loss:0.664165, Acc_avg:61.75% Training_loss_avg:0.627485
Epoch:2 Step:3280 Training_loss:0.941960, Acc_avg:61.75% Training_loss_avg:0.630362
Epoch:2 Step:3288 Training_loss:0.631004, Acc_avg:61.50% Training_loss_avg:0.631511
Epoch:2 Step:3296 Training_loss:0.656652, Acc_avg:61.25% Training_loss_avg:0.633372
Epoch:2 Step:3304 Training_loss:0.515700, Acc_avg:61.25% Training_loss_avg:0.630917
Epoch:2 Step:3312 Training_loss:0.603435, Acc_avg:61.75% Training_loss_avg:0.628927
Epoch:2 Step:3320 Tr

52it [00:07,  6.62it/s]


Epoch:3 Step:0 Val_loss:0.632898, Val_Acc_avg:62.75%
Epoch:3 Step:8 Training_loss:0.654311, Acc_avg:61.75% Training_loss_avg:0.627629
Epoch:3 Step:16 Training_loss:0.621090, Acc_avg:61.25% Training_loss_avg:0.628999
Epoch:3 Step:24 Training_loss:0.570725, Acc_avg:61.25% Training_loss_avg:0.629796
Epoch:3 Step:32 Training_loss:0.630980, Acc_avg:61.00% Training_loss_avg:0.629106
Epoch:3 Step:40 Training_loss:0.599568, Acc_avg:60.75% Training_loss_avg:0.628622
Epoch:3 Step:48 Training_loss:0.500842, Acc_avg:61.50% Training_loss_avg:0.623972
Epoch:3 Step:56 Training_loss:0.473249, Acc_avg:62.25% Training_loss_avg:0.618435
Epoch:3 Step:64 Training_loss:0.693761, Acc_avg:62.00% Training_loss_avg:0.620927
Epoch:3 Step:72 Training_loss:0.447022, Acc_avg:63.00% Training_loss_avg:0.613900
Epoch:3 Step:80 Training_loss:0.546732, Acc_avg:63.50% Training_loss_avg:0.611882
Epoch:3 Step:88 Training_loss:0.612959, Acc_avg:63.75% Training_loss_avg:0.612687
Epoch:3 Step:96 Training_loss:0.484075, Acc_av

52it [00:07,  6.61it/s]


Epoch:3 Step:248 Val_loss:0.625752, Val_Acc_avg:61.75%
Epoch:3 Step:256 Training_loss:0.686978, Acc_avg:61.50% Training_loss_avg:0.632023
Epoch:3 Step:264 Training_loss:0.696273, Acc_avg:60.75% Training_loss_avg:0.635625
Epoch:3 Step:272 Training_loss:0.715163, Acc_avg:60.50% Training_loss_avg:0.636135
Epoch:3 Step:280 Training_loss:0.612699, Acc_avg:60.75% Training_loss_avg:0.632198
Epoch:3 Step:288 Training_loss:0.623709, Acc_avg:60.75% Training_loss_avg:0.633563
Epoch:3 Step:296 Training_loss:0.602969, Acc_avg:60.75% Training_loss_avg:0.633530
Epoch:3 Step:304 Training_loss:0.572624, Acc_avg:61.00% Training_loss_avg:0.632135
Epoch:3 Step:312 Training_loss:0.619624, Acc_avg:61.00% Training_loss_avg:0.633156
Epoch:3 Step:320 Training_loss:0.589136, Acc_avg:61.25% Training_loss_avg:0.633114
Epoch:3 Step:328 Training_loss:0.562564, Acc_avg:61.25% Training_loss_avg:0.629227
Epoch:3 Step:336 Training_loss:0.593219, Acc_avg:61.25% Training_loss_avg:0.627808
Epoch:3 Step:344 Training_loss:0

52it [00:07,  6.62it/s]


Epoch:3 Step:496 Val_loss:0.623682, Val_Acc_avg:62.75%
Epoch:3 Step:504 Training_loss:0.693850, Acc_avg:62.00% Training_loss_avg:0.627581
Epoch:3 Step:512 Training_loss:0.541879, Acc_avg:63.00% Training_loss_avg:0.622530
Epoch:3 Step:520 Training_loss:0.529032, Acc_avg:63.00% Training_loss_avg:0.619998
Epoch:3 Step:528 Training_loss:0.738428, Acc_avg:62.50% Training_loss_avg:0.622639
Epoch:3 Step:536 Training_loss:0.699831, Acc_avg:62.25% Training_loss_avg:0.624993
Epoch:3 Step:544 Training_loss:0.611498, Acc_avg:62.50% Training_loss_avg:0.626421
Epoch:3 Step:552 Training_loss:0.474556, Acc_avg:63.25% Training_loss_avg:0.621214
Epoch:3 Step:560 Training_loss:0.519883, Acc_avg:63.25% Training_loss_avg:0.619539
Epoch:3 Step:568 Training_loss:0.594889, Acc_avg:63.25% Training_loss_avg:0.617948
Epoch:3 Step:576 Training_loss:0.671547, Acc_avg:63.00% Training_loss_avg:0.620460
Epoch:3 Step:584 Training_loss:0.683951, Acc_avg:63.25% Training_loss_avg:0.618034
Epoch:3 Step:592 Training_loss:0

52it [00:07,  6.62it/s]


Epoch:3 Step:744 Val_loss:0.628049, Val_Acc_avg:62.50%
Epoch:3 Step:752 Training_loss:0.588858, Acc_avg:65.75% Training_loss_avg:0.606188
Epoch:3 Step:760 Training_loss:0.725496, Acc_avg:65.50% Training_loss_avg:0.609788
Epoch:3 Step:768 Training_loss:0.646035, Acc_avg:65.50% Training_loss_avg:0.611622
Epoch:3 Step:776 Training_loss:0.506639, Acc_avg:65.50% Training_loss_avg:0.612376
Epoch:3 Step:784 Training_loss:0.557744, Acc_avg:65.75% Training_loss_avg:0.610597
Epoch:3 Step:792 Training_loss:0.586093, Acc_avg:65.50% Training_loss_avg:0.612540
Epoch:3 Step:800 Training_loss:0.445383, Acc_avg:66.25% Training_loss_avg:0.607859
Epoch:3 Step:808 Training_loss:0.676959, Acc_avg:65.50% Training_loss_avg:0.608862
Epoch:3 Step:816 Training_loss:0.535113, Acc_avg:64.75% Training_loss_avg:0.610478
Epoch:3 Step:824 Training_loss:0.795711, Acc_avg:64.25% Training_loss_avg:0.615878
Epoch:3 Step:832 Training_loss:0.713027, Acc_avg:63.50% Training_loss_avg:0.620048
Epoch:3 Step:840 Training_loss:0

52it [00:07,  6.62it/s]


Epoch:3 Step:992 Val_loss:0.640626, Val_Acc_avg:62.75%
Epoch:3 Step:1000 Training_loss:0.475225, Acc_avg:67.75% Training_loss_avg:0.604428
Epoch:3 Step:1008 Training_loss:0.819981, Acc_avg:68.00% Training_loss_avg:0.606139
Epoch:3 Step:1016 Training_loss:0.546923, Acc_avg:67.50% Training_loss_avg:0.608880
Epoch:3 Step:1024 Training_loss:0.529002, Acc_avg:67.25% Training_loss_avg:0.606501
Epoch:3 Step:1032 Training_loss:0.756147, Acc_avg:66.75% Training_loss_avg:0.610961
Epoch:3 Step:1040 Training_loss:0.736344, Acc_avg:66.75% Training_loss_avg:0.611026
Epoch:3 Step:1048 Training_loss:0.581135, Acc_avg:66.75% Training_loss_avg:0.611121
Epoch:3 Step:1056 Training_loss:0.700226, Acc_avg:66.25% Training_loss_avg:0.615339
Epoch:3 Step:1064 Training_loss:0.627260, Acc_avg:65.50% Training_loss_avg:0.618319
Epoch:3 Step:1072 Training_loss:0.655489, Acc_avg:65.25% Training_loss_avg:0.620552
Epoch:3 Step:1080 Training_loss:0.687334, Acc_avg:65.00% Training_loss_avg:0.621706
Epoch:3 Step:1088 Tra

52it [00:07,  6.62it/s]


Epoch:3 Step:1240 Val_loss:0.620663, Val_Acc_avg:61.25%
Epoch:3 Step:1248 Training_loss:0.464076, Acc_avg:66.00% Training_loss_avg:0.606105
Epoch:3 Step:1256 Training_loss:0.652273, Acc_avg:65.00% Training_loss_avg:0.609752
Epoch:3 Step:1264 Training_loss:0.516036, Acc_avg:65.75% Training_loss_avg:0.603893
Epoch:3 Step:1272 Training_loss:0.606698, Acc_avg:65.25% Training_loss_avg:0.605993
Epoch:3 Step:1280 Training_loss:0.759032, Acc_avg:65.25% Training_loss_avg:0.610592
Epoch:3 Step:1288 Training_loss:0.712068, Acc_avg:65.00% Training_loss_avg:0.614099
Epoch:3 Step:1296 Training_loss:0.700455, Acc_avg:64.75% Training_loss_avg:0.617825
Epoch:3 Step:1304 Training_loss:0.672019, Acc_avg:64.50% Training_loss_avg:0.621381
Epoch:3 Step:1312 Training_loss:0.731191, Acc_avg:64.00% Training_loss_avg:0.620224
Epoch:3 Step:1320 Training_loss:0.462903, Acc_avg:64.50% Training_loss_avg:0.613131
Epoch:3 Step:1328 Training_loss:0.653639, Acc_avg:65.00% Training_loss_avg:0.606913
Epoch:3 Step:1336 Tr

52it [00:07,  6.62it/s]


Epoch:3 Step:1488 Val_loss:0.641312, Val_Acc_avg:63.50%
Epoch:3 Step:1496 Training_loss:0.703820, Acc_avg:62.25% Training_loss_avg:0.622059
Epoch:3 Step:1504 Training_loss:0.565841, Acc_avg:61.75% Training_loss_avg:0.627098
Epoch:3 Step:1512 Training_loss:0.509650, Acc_avg:62.00% Training_loss_avg:0.624809
Epoch:3 Step:1520 Training_loss:0.544416, Acc_avg:61.50% Training_loss_avg:0.626616
Epoch:3 Step:1528 Training_loss:0.660052, Acc_avg:61.25% Training_loss_avg:0.627286
Epoch:3 Step:1536 Training_loss:0.718482, Acc_avg:60.75% Training_loss_avg:0.630560
Epoch:3 Step:1544 Training_loss:0.682545, Acc_avg:61.25% Training_loss_avg:0.630898
Epoch:3 Step:1552 Training_loss:0.588647, Acc_avg:61.50% Training_loss_avg:0.629295
Epoch:3 Step:1560 Training_loss:0.670157, Acc_avg:61.00% Training_loss_avg:0.630719
Epoch:3 Step:1568 Training_loss:0.637835, Acc_avg:60.75% Training_loss_avg:0.631933
Epoch:3 Step:1576 Training_loss:0.725101, Acc_avg:59.75% Training_loss_avg:0.636966
Epoch:3 Step:1584 Tr

52it [00:07,  6.62it/s]


Epoch:3 Step:1736 Val_loss:0.668813, Val_Acc_avg:62.75%
Epoch:3 Step:1744 Training_loss:0.564488, Acc_avg:62.00% Training_loss_avg:0.628534
Epoch:3 Step:1752 Training_loss:0.898176, Acc_avg:61.75% Training_loss_avg:0.631562
Epoch:3 Step:1760 Training_loss:0.707327, Acc_avg:61.25% Training_loss_avg:0.636341
Epoch:3 Step:1768 Training_loss:0.366832, Acc_avg:61.75% Training_loss_avg:0.631447
Epoch:3 Step:1776 Training_loss:0.968867, Acc_avg:60.75% Training_loss_avg:0.640412
Epoch:3 Step:1784 Training_loss:0.893717, Acc_avg:59.25% Training_loss_avg:0.649086
Epoch:3 Step:1792 Training_loss:0.783520, Acc_avg:59.50% Training_loss_avg:0.650121
Epoch:3 Step:1800 Training_loss:0.547038, Acc_avg:59.75% Training_loss_avg:0.645748
Epoch:3 Step:1808 Training_loss:0.792233, Acc_avg:59.75% Training_loss_avg:0.648955
Epoch:3 Step:1816 Training_loss:0.524424, Acc_avg:59.50% Training_loss_avg:0.651640
Epoch:3 Step:1824 Training_loss:0.656956, Acc_avg:59.75% Training_loss_avg:0.644594
Epoch:3 Step:1832 Tr

52it [00:07,  6.62it/s]


Epoch:3 Step:1984 Val_loss:0.623052, Val_Acc_avg:63.25%
Epoch:3 Step:1992 Training_loss:0.942231, Acc_avg:65.75% Training_loss_avg:0.625553
Epoch:3 Step:2000 Training_loss:0.622797, Acc_avg:65.75% Training_loss_avg:0.626186
Epoch:3 Step:2008 Training_loss:0.656194, Acc_avg:65.50% Training_loss_avg:0.627085
Epoch:3 Step:2016 Training_loss:0.644116, Acc_avg:65.50% Training_loss_avg:0.627707
Epoch:3 Step:2024 Training_loss:0.691104, Acc_avg:65.00% Training_loss_avg:0.629760
Epoch:3 Step:2032 Training_loss:0.556594, Acc_avg:65.25% Training_loss_avg:0.627612
Epoch:3 Step:2040 Training_loss:0.618991, Acc_avg:65.50% Training_loss_avg:0.626127
Epoch:3 Step:2048 Training_loss:0.727492, Acc_avg:65.75% Training_loss_avg:0.623999
Epoch:3 Step:2056 Training_loss:0.621647, Acc_avg:65.50% Training_loss_avg:0.625470
Epoch:3 Step:2064 Training_loss:0.536756, Acc_avg:65.75% Training_loss_avg:0.622284
Epoch:3 Step:2072 Training_loss:0.539040, Acc_avg:66.25% Training_loss_avg:0.622520
Epoch:3 Step:2080 Tr

52it [00:07,  6.60it/s]


Epoch:3 Step:2232 Val_loss:0.626805, Val_Acc_avg:59.50%
Epoch:3 Step:2240 Training_loss:0.684566, Acc_avg:64.75% Training_loss_avg:0.617305
Epoch:3 Step:2248 Training_loss:0.542086, Acc_avg:65.00% Training_loss_avg:0.616094
Epoch:3 Step:2256 Training_loss:0.470153, Acc_avg:64.75% Training_loss_avg:0.613747
Epoch:3 Step:2264 Training_loss:0.500000, Acc_avg:65.25% Training_loss_avg:0.608119
Epoch:3 Step:2272 Training_loss:0.519578, Acc_avg:65.25% Training_loss_avg:0.608162
Epoch:3 Step:2280 Training_loss:0.458381, Acc_avg:65.25% Training_loss_avg:0.606817
Epoch:3 Step:2288 Training_loss:0.800815, Acc_avg:64.75% Training_loss_avg:0.609975
Epoch:3 Step:2296 Training_loss:0.893458, Acc_avg:64.00% Training_loss_avg:0.616009
Epoch:3 Step:2304 Training_loss:0.727364, Acc_avg:63.75% Training_loss_avg:0.617334
Epoch:3 Step:2312 Training_loss:0.679674, Acc_avg:64.50% Training_loss_avg:0.616276
Epoch:3 Step:2320 Training_loss:0.560269, Acc_avg:65.00% Training_loss_avg:0.613892
Epoch:3 Step:2328 Tr

52it [00:07,  6.62it/s]


Epoch:3 Step:2480 Val_loss:0.635495, Val_Acc_avg:63.00%
Epoch:3 Step:2488 Training_loss:0.661439, Acc_avg:63.25% Training_loss_avg:0.632959
Epoch:3 Step:2496 Training_loss:0.685832, Acc_avg:63.25% Training_loss_avg:0.634187
Epoch:3 Step:2504 Training_loss:0.489926, Acc_avg:63.50% Training_loss_avg:0.633874
Epoch:3 Step:2512 Training_loss:0.753142, Acc_avg:63.50% Training_loss_avg:0.632964
Epoch:3 Step:2520 Training_loss:0.521730, Acc_avg:63.50% Training_loss_avg:0.630436
Epoch:3 Step:2528 Training_loss:0.616990, Acc_avg:63.50% Training_loss_avg:0.631520
Epoch:3 Step:2536 Training_loss:0.679427, Acc_avg:63.50% Training_loss_avg:0.633358
Epoch:3 Step:2544 Training_loss:0.436694, Acc_avg:64.00% Training_loss_avg:0.631004
Epoch:3 Step:2552 Training_loss:0.531656, Acc_avg:64.75% Training_loss_avg:0.628668
Epoch:3 Step:2560 Training_loss:0.519389, Acc_avg:65.00% Training_loss_avg:0.628777
Epoch:3 Step:2568 Training_loss:0.605470, Acc_avg:64.75% Training_loss_avg:0.630300
Epoch:3 Step:2576 Tr

52it [00:07,  6.62it/s]


Epoch:3 Step:2728 Val_loss:0.622393, Val_Acc_avg:64.25%
Epoch:3 Step:2736 Training_loss:0.645847, Acc_avg:66.75% Training_loss_avg:0.612522
Epoch:3 Step:2744 Training_loss:0.612397, Acc_avg:66.50% Training_loss_avg:0.614875
Epoch:3 Step:2752 Training_loss:0.548646, Acc_avg:67.50% Training_loss_avg:0.607471
Epoch:3 Step:2760 Training_loss:0.378167, Acc_avg:67.25% Training_loss_avg:0.609451
Epoch:3 Step:2768 Training_loss:0.566101, Acc_avg:67.50% Training_loss_avg:0.606014
Epoch:3 Step:2776 Training_loss:0.630656, Acc_avg:68.50% Training_loss_avg:0.595339
Epoch:3 Step:2784 Training_loss:0.705647, Acc_avg:67.25% Training_loss_avg:0.602207
Epoch:3 Step:2792 Training_loss:0.462280, Acc_avg:67.50% Training_loss_avg:0.596387
Epoch:3 Step:2800 Training_loss:0.526724, Acc_avg:68.25% Training_loss_avg:0.590875
Epoch:3 Step:2808 Training_loss:0.690455, Acc_avg:67.50% Training_loss_avg:0.597081
Epoch:3 Step:2816 Training_loss:0.521896, Acc_avg:67.75% Training_loss_avg:0.594317
Epoch:3 Step:2824 Tr

52it [00:07,  6.62it/s]


Epoch:3 Step:2976 Val_loss:0.628638, Val_Acc_avg:64.00%
Epoch:3 Step:2984 Training_loss:0.528652, Acc_avg:66.75% Training_loss_avg:0.585363
Epoch:3 Step:2992 Training_loss:0.520152, Acc_avg:66.25% Training_loss_avg:0.586251
Epoch:3 Step:3000 Training_loss:0.849067, Acc_avg:65.50% Training_loss_avg:0.591412
Epoch:3 Step:3008 Training_loss:0.477810, Acc_avg:66.00% Training_loss_avg:0.587576
Epoch:3 Step:3016 Training_loss:0.596971, Acc_avg:65.50% Training_loss_avg:0.587009
Epoch:3 Step:3024 Training_loss:0.735676, Acc_avg:64.50% Training_loss_avg:0.593221
Epoch:3 Step:3032 Training_loss:0.678267, Acc_avg:64.50% Training_loss_avg:0.593535
Epoch:3 Step:3040 Training_loss:0.707807, Acc_avg:63.75% Training_loss_avg:0.600086
Epoch:3 Step:3048 Training_loss:0.576894, Acc_avg:63.50% Training_loss_avg:0.599229
Epoch:3 Step:3056 Training_loss:0.491568, Acc_avg:64.25% Training_loss_avg:0.594452
Epoch:3 Step:3064 Training_loss:0.609223, Acc_avg:63.75% Training_loss_avg:0.597362
Epoch:3 Step:3072 Tr

52it [00:07,  6.62it/s]


Epoch:3 Step:3224 Val_loss:0.662113, Val_Acc_avg:63.75%
Epoch:3 Step:3232 Training_loss:0.426662, Acc_avg:64.00% Training_loss_avg:0.594707
Epoch:3 Step:3240 Training_loss:0.316100, Acc_avg:64.50% Training_loss_avg:0.589940
Epoch:3 Step:3248 Training_loss:0.771292, Acc_avg:64.25% Training_loss_avg:0.596331
Epoch:3 Step:3256 Training_loss:0.526655, Acc_avg:64.00% Training_loss_avg:0.595019
Epoch:3 Step:3264 Training_loss:0.551333, Acc_avg:64.00% Training_loss_avg:0.593727
Epoch:3 Step:3272 Training_loss:0.614036, Acc_avg:63.50% Training_loss_avg:0.594474
Epoch:3 Step:3280 Training_loss:0.458739, Acc_avg:63.00% Training_loss_avg:0.595044
Epoch:3 Step:3288 Training_loss:0.850949, Acc_avg:63.00% Training_loss_avg:0.599520
Epoch:3 Step:3296 Training_loss:0.822921, Acc_avg:62.25% Training_loss_avg:0.605311
Epoch:3 Step:3304 Training_loss:0.425057, Acc_avg:62.75% Training_loss_avg:0.601249
Epoch:3 Step:3312 Training_loss:0.639570, Acc_avg:62.50% Training_loss_avg:0.604482
Epoch:3 Step:3320 Tr

52it [00:07,  6.62it/s]


Epoch:4 Step:0 Val_loss:0.624063, Val_Acc_avg:63.00%
Epoch:4 Step:8 Training_loss:0.567690, Acc_avg:62.42% Training_loss_avg:0.604519
Epoch:4 Step:16 Training_loss:0.615633, Acc_avg:62.67% Training_loss_avg:0.603444
Epoch:4 Step:24 Training_loss:0.586590, Acc_avg:62.67% Training_loss_avg:0.603422
Epoch:4 Step:32 Training_loss:0.637061, Acc_avg:63.67% Training_loss_avg:0.598163
Epoch:4 Step:40 Training_loss:0.418262, Acc_avg:64.42% Training_loss_avg:0.595933
Epoch:4 Step:48 Training_loss:0.790235, Acc_avg:63.67% Training_loss_avg:0.601164
Epoch:4 Step:56 Training_loss:0.595679, Acc_avg:63.42% Training_loss_avg:0.602675
Epoch:4 Step:64 Training_loss:0.568634, Acc_avg:64.17% Training_loss_avg:0.597066
Epoch:4 Step:72 Training_loss:0.628561, Acc_avg:63.92% Training_loss_avg:0.600081
Epoch:4 Step:80 Training_loss:0.576987, Acc_avg:64.42% Training_loss_avg:0.599682
Epoch:4 Step:88 Training_loss:0.674713, Acc_avg:64.92% Training_loss_avg:0.598462
Epoch:4 Step:96 Training_loss:0.481272, Acc_av

52it [00:07,  6.62it/s]


Epoch:4 Step:248 Val_loss:0.652081, Val_Acc_avg:63.25%
Epoch:4 Step:256 Training_loss:0.513857, Acc_avg:68.17% Training_loss_avg:0.600031
Epoch:4 Step:264 Training_loss:0.486056, Acc_avg:68.42% Training_loss_avg:0.599346
Epoch:4 Step:272 Training_loss:0.677568, Acc_avg:68.42% Training_loss_avg:0.601004
Epoch:4 Step:280 Training_loss:0.654835, Acc_avg:69.17% Training_loss_avg:0.592812
Epoch:4 Step:288 Training_loss:0.437692, Acc_avg:70.17% Training_loss_avg:0.586051
Epoch:4 Step:296 Training_loss:0.664416, Acc_avg:69.67% Training_loss_avg:0.590806
Epoch:4 Step:304 Training_loss:0.581507, Acc_avg:68.92% Training_loss_avg:0.596114
Epoch:4 Step:312 Training_loss:0.660955, Acc_avg:69.42% Training_loss_avg:0.593907
Epoch:4 Step:320 Training_loss:0.622262, Acc_avg:69.17% Training_loss_avg:0.595819
Epoch:4 Step:328 Training_loss:0.309129, Acc_avg:69.67% Training_loss_avg:0.590975
Epoch:4 Step:336 Training_loss:0.772058, Acc_avg:69.42% Training_loss_avg:0.594136
Epoch:4 Step:344 Training_loss:0

52it [00:07,  6.61it/s]


Epoch:4 Step:496 Val_loss:0.643882, Val_Acc_avg:64.50%
Epoch:4 Step:504 Training_loss:0.941805, Acc_avg:70.00% Training_loss_avg:0.592552
Epoch:4 Step:512 Training_loss:0.352170, Acc_avg:70.25% Training_loss_avg:0.587808
Epoch:4 Step:520 Training_loss:0.776538, Acc_avg:69.50% Training_loss_avg:0.592935
Epoch:4 Step:528 Training_loss:0.765349, Acc_avg:69.25% Training_loss_avg:0.596155
Epoch:4 Step:536 Training_loss:0.727524, Acc_avg:68.75% Training_loss_avg:0.601590
Epoch:4 Step:544 Training_loss:0.445234, Acc_avg:68.75% Training_loss_avg:0.602180
Epoch:4 Step:552 Training_loss:0.489711, Acc_avg:69.00% Training_loss_avg:0.603593
Epoch:4 Step:560 Training_loss:0.597936, Acc_avg:68.75% Training_loss_avg:0.605905
Epoch:4 Step:568 Training_loss:0.759575, Acc_avg:68.50% Training_loss_avg:0.612964
Epoch:4 Step:576 Training_loss:0.467463, Acc_avg:69.00% Training_loss_avg:0.612187
Epoch:4 Step:584 Training_loss:0.469202, Acc_avg:69.00% Training_loss_avg:0.611101
Epoch:4 Step:592 Training_loss:0

52it [00:07,  6.62it/s]


Epoch:4 Step:744 Val_loss:0.613862, Val_Acc_avg:65.25%
Epoch:4 Step:752 Training_loss:0.692034, Acc_avg:69.50% Training_loss_avg:0.586872
Epoch:4 Step:760 Training_loss:0.347074, Acc_avg:69.75% Training_loss_avg:0.584697
Epoch:4 Step:768 Training_loss:0.459617, Acc_avg:69.25% Training_loss_avg:0.583400
Epoch:4 Step:776 Training_loss:0.521974, Acc_avg:69.00% Training_loss_avg:0.584233
Epoch:4 Step:784 Training_loss:0.597866, Acc_avg:69.25% Training_loss_avg:0.583309
Epoch:4 Step:792 Training_loss:0.667835, Acc_avg:68.25% Training_loss_avg:0.586426
Epoch:4 Step:800 Training_loss:0.688662, Acc_avg:67.25% Training_loss_avg:0.589989
Epoch:4 Step:808 Training_loss:0.347460, Acc_avg:68.00% Training_loss_avg:0.583023
Epoch:4 Step:816 Training_loss:0.601212, Acc_avg:68.00% Training_loss_avg:0.583020
Epoch:4 Step:824 Training_loss:0.639978, Acc_avg:67.25% Training_loss_avg:0.585080
Epoch:4 Step:832 Training_loss:0.524360, Acc_avg:67.00% Training_loss_avg:0.587771
Epoch:4 Step:840 Training_loss:0

52it [00:07,  6.62it/s]


Epoch:4 Step:992 Val_loss:0.636689, Val_Acc_avg:64.75%
Epoch:4 Step:1000 Training_loss:0.487033, Acc_avg:68.00% Training_loss_avg:0.567893
Epoch:4 Step:1008 Training_loss:0.761631, Acc_avg:68.00% Training_loss_avg:0.569033
Epoch:4 Step:1016 Training_loss:0.324146, Acc_avg:68.75% Training_loss_avg:0.562348
Epoch:4 Step:1024 Training_loss:0.475505, Acc_avg:68.75% Training_loss_avg:0.563141
Epoch:4 Step:1032 Training_loss:0.480113, Acc_avg:68.25% Training_loss_avg:0.565841
Epoch:4 Step:1040 Training_loss:0.445382, Acc_avg:68.50% Training_loss_avg:0.563506
Epoch:4 Step:1048 Training_loss:0.349102, Acc_avg:69.00% Training_loss_avg:0.553758
Epoch:4 Step:1056 Training_loss:0.627021, Acc_avg:69.75% Training_loss_avg:0.549549
Epoch:4 Step:1064 Training_loss:0.408457, Acc_avg:69.50% Training_loss_avg:0.548344
Epoch:4 Step:1072 Training_loss:0.761232, Acc_avg:69.00% Training_loss_avg:0.553902
Epoch:4 Step:1080 Training_loss:0.640222, Acc_avg:69.50% Training_loss_avg:0.553652
Epoch:4 Step:1088 Tra

52it [00:07,  6.62it/s]


Epoch:4 Step:1240 Val_loss:0.623070, Val_Acc_avg:62.25%
Epoch:4 Step:1248 Training_loss:0.558872, Acc_avg:69.25% Training_loss_avg:0.567868
Epoch:4 Step:1256 Training_loss:0.625464, Acc_avg:69.00% Training_loss_avg:0.567979
Epoch:4 Step:1264 Training_loss:0.647840, Acc_avg:69.00% Training_loss_avg:0.569221
Epoch:4 Step:1272 Training_loss:0.714827, Acc_avg:69.00% Training_loss_avg:0.570894
Epoch:4 Step:1280 Training_loss:0.557060, Acc_avg:68.75% Training_loss_avg:0.574093
Epoch:4 Step:1288 Training_loss:0.733402, Acc_avg:68.50% Training_loss_avg:0.575059
Epoch:4 Step:1296 Training_loss:0.905008, Acc_avg:68.50% Training_loss_avg:0.578736
Epoch:4 Step:1304 Training_loss:0.522723, Acc_avg:68.25% Training_loss_avg:0.583034
Epoch:4 Step:1312 Training_loss:0.606975, Acc_avg:68.00% Training_loss_avg:0.582160
Epoch:4 Step:1320 Training_loss:0.751678, Acc_avg:67.00% Training_loss_avg:0.586517
Epoch:4 Step:1328 Training_loss:0.585702, Acc_avg:67.00% Training_loss_avg:0.588433
Epoch:4 Step:1336 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:1488 Val_loss:0.628077, Val_Acc_avg:60.50%
Epoch:4 Step:1496 Training_loss:0.495105, Acc_avg:63.00% Training_loss_avg:0.601472
Epoch:4 Step:1504 Training_loss:0.612248, Acc_avg:63.00% Training_loss_avg:0.603917
Epoch:4 Step:1512 Training_loss:0.528331, Acc_avg:63.25% Training_loss_avg:0.602750
Epoch:4 Step:1520 Training_loss:0.670523, Acc_avg:63.00% Training_loss_avg:0.600388
Epoch:4 Step:1528 Training_loss:0.557891, Acc_avg:63.25% Training_loss_avg:0.597860
Epoch:4 Step:1536 Training_loss:0.529218, Acc_avg:63.25% Training_loss_avg:0.598840
Epoch:4 Step:1544 Training_loss:0.699483, Acc_avg:62.50% Training_loss_avg:0.601903
Epoch:4 Step:1552 Training_loss:0.567515, Acc_avg:62.00% Training_loss_avg:0.603713
Epoch:4 Step:1560 Training_loss:0.752561, Acc_avg:61.50% Training_loss_avg:0.611975
Epoch:4 Step:1568 Training_loss:0.639039, Acc_avg:60.75% Training_loss_avg:0.615987
Epoch:4 Step:1576 Training_loss:0.455549, Acc_avg:61.50% Training_loss_avg:0.610081
Epoch:4 Step:1584 Tr

52it [00:07,  6.62it/s]


Epoch:4 Step:1736 Val_loss:0.613826, Val_Acc_avg:61.75%
Epoch:4 Step:1744 Training_loss:0.588740, Acc_avg:61.50% Training_loss_avg:0.602200
Epoch:4 Step:1752 Training_loss:0.558594, Acc_avg:62.00% Training_loss_avg:0.598740
Epoch:4 Step:1760 Training_loss:0.506161, Acc_avg:62.75% Training_loss_avg:0.594360
Epoch:4 Step:1768 Training_loss:0.520262, Acc_avg:63.00% Training_loss_avg:0.594343
Epoch:4 Step:1776 Training_loss:0.666734, Acc_avg:63.00% Training_loss_avg:0.593455
Epoch:4 Step:1784 Training_loss:0.307286, Acc_avg:63.25% Training_loss_avg:0.589866
Epoch:4 Step:1792 Training_loss:0.487751, Acc_avg:63.25% Training_loss_avg:0.591597
Epoch:4 Step:1800 Training_loss:0.688787, Acc_avg:63.50% Training_loss_avg:0.592567
Epoch:4 Step:1808 Training_loss:0.618248, Acc_avg:63.50% Training_loss_avg:0.590416
Epoch:4 Step:1816 Training_loss:0.562422, Acc_avg:64.25% Training_loss_avg:0.586856
Epoch:4 Step:1824 Training_loss:0.482752, Acc_avg:64.00% Training_loss_avg:0.583492
Epoch:4 Step:1832 Tr

52it [00:07,  6.62it/s]


Epoch:4 Step:1984 Val_loss:0.629526, Val_Acc_avg:64.75%
Epoch:4 Step:1992 Training_loss:0.373773, Acc_avg:66.50% Training_loss_avg:0.584129
Epoch:4 Step:2000 Training_loss:0.555970, Acc_avg:66.50% Training_loss_avg:0.583412
Epoch:4 Step:2008 Training_loss:0.620238, Acc_avg:67.25% Training_loss_avg:0.578438
Epoch:4 Step:2016 Training_loss:0.577521, Acc_avg:67.25% Training_loss_avg:0.579131
Epoch:4 Step:2024 Training_loss:0.582737, Acc_avg:67.75% Training_loss_avg:0.577316
Epoch:4 Step:2032 Training_loss:0.887284, Acc_avg:67.50% Training_loss_avg:0.581029
Epoch:4 Step:2040 Training_loss:0.402805, Acc_avg:68.00% Training_loss_avg:0.576511
Epoch:4 Step:2048 Training_loss:0.498354, Acc_avg:68.00% Training_loss_avg:0.577370
Epoch:4 Step:2056 Training_loss:0.253584, Acc_avg:69.00% Training_loss_avg:0.569220
Epoch:4 Step:2064 Training_loss:0.639850, Acc_avg:69.25% Training_loss_avg:0.566683
Epoch:4 Step:2072 Training_loss:0.722888, Acc_avg:68.25% Training_loss_avg:0.572853
Epoch:4 Step:2080 Tr

52it [00:07,  6.62it/s]


Epoch:4 Step:2232 Val_loss:0.604483, Val_Acc_avg:63.50%
Epoch:4 Step:2240 Training_loss:0.613777, Acc_avg:69.00% Training_loss_avg:0.569708
Epoch:4 Step:2248 Training_loss:0.461491, Acc_avg:69.25% Training_loss_avg:0.567021
Epoch:4 Step:2256 Training_loss:0.708992, Acc_avg:69.75% Training_loss_avg:0.566373
Epoch:4 Step:2264 Training_loss:0.387136, Acc_avg:70.00% Training_loss_avg:0.563902
Epoch:4 Step:2272 Training_loss:0.472057, Acc_avg:70.00% Training_loss_avg:0.563558
Epoch:4 Step:2280 Training_loss:0.544062, Acc_avg:70.50% Training_loss_avg:0.559798
Epoch:4 Step:2288 Training_loss:0.553678, Acc_avg:71.00% Training_loss_avg:0.552841
Epoch:4 Step:2296 Training_loss:0.575683, Acc_avg:70.75% Training_loss_avg:0.555330
Epoch:4 Step:2304 Training_loss:0.779561, Acc_avg:70.50% Training_loss_avg:0.559651
Epoch:4 Step:2312 Training_loss:0.565871, Acc_avg:70.50% Training_loss_avg:0.560694
Epoch:4 Step:2320 Training_loss:0.674511, Acc_avg:70.50% Training_loss_avg:0.558653
Epoch:4 Step:2328 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:2480 Val_loss:0.606368, Val_Acc_avg:64.00%
Epoch:4 Step:2488 Training_loss:0.577913, Acc_avg:64.25% Training_loss_avg:0.588137
Epoch:4 Step:2496 Training_loss:0.599376, Acc_avg:64.25% Training_loss_avg:0.588301
Epoch:4 Step:2504 Training_loss:0.518147, Acc_avg:64.25% Training_loss_avg:0.588606
Epoch:4 Step:2512 Training_loss:0.360588, Acc_avg:64.50% Training_loss_avg:0.585359
Epoch:4 Step:2520 Training_loss:0.643441, Acc_avg:64.75% Training_loss_avg:0.582740
Epoch:4 Step:2528 Training_loss:0.587112, Acc_avg:64.00% Training_loss_avg:0.584684
Epoch:4 Step:2536 Training_loss:0.647556, Acc_avg:63.25% Training_loss_avg:0.589888
Epoch:4 Step:2544 Training_loss:0.359687, Acc_avg:63.00% Training_loss_avg:0.591698
Epoch:4 Step:2552 Training_loss:0.575264, Acc_avg:62.50% Training_loss_avg:0.593907
Epoch:4 Step:2560 Training_loss:0.428746, Acc_avg:63.00% Training_loss_avg:0.593729
Epoch:4 Step:2568 Training_loss:0.561245, Acc_avg:63.00% Training_loss_avg:0.590762
Epoch:4 Step:2576 Tr

52it [00:07,  6.62it/s]


Epoch:4 Step:2728 Val_loss:0.613492, Val_Acc_avg:62.25%
Epoch:4 Step:2736 Training_loss:0.675986, Acc_avg:64.75% Training_loss_avg:0.584266
Epoch:4 Step:2744 Training_loss:0.490294, Acc_avg:64.75% Training_loss_avg:0.583478
Epoch:4 Step:2752 Training_loss:0.333614, Acc_avg:65.25% Training_loss_avg:0.579448
Epoch:4 Step:2760 Training_loss:0.540486, Acc_avg:65.50% Training_loss_avg:0.577009
Epoch:4 Step:2768 Training_loss:0.316952, Acc_avg:66.50% Training_loss_avg:0.571156
Epoch:4 Step:2776 Training_loss:0.438694, Acc_avg:66.50% Training_loss_avg:0.571164
Epoch:4 Step:2784 Training_loss:0.465745, Acc_avg:66.50% Training_loss_avg:0.571224
Epoch:4 Step:2792 Training_loss:0.420821, Acc_avg:67.25% Training_loss_avg:0.564551
Epoch:4 Step:2800 Training_loss:0.633117, Acc_avg:67.75% Training_loss_avg:0.563012
Epoch:4 Step:2808 Training_loss:0.894070, Acc_avg:66.75% Training_loss_avg:0.570994
Epoch:4 Step:2816 Training_loss:0.432145, Acc_avg:67.50% Training_loss_avg:0.566419
Epoch:4 Step:2824 Tr

52it [00:07,  6.61it/s]


Epoch:4 Step:2976 Val_loss:0.627388, Val_Acc_avg:61.75%
Epoch:4 Step:2984 Training_loss:0.679367, Acc_avg:70.50% Training_loss_avg:0.575101
Epoch:4 Step:2992 Training_loss:0.418711, Acc_avg:70.50% Training_loss_avg:0.570775
Epoch:4 Step:3000 Training_loss:0.326063, Acc_avg:71.75% Training_loss_avg:0.564331
Epoch:4 Step:3008 Training_loss:0.496098, Acc_avg:71.75% Training_loss_avg:0.563876
Epoch:4 Step:3016 Training_loss:0.577549, Acc_avg:72.00% Training_loss_avg:0.565962
Epoch:4 Step:3024 Training_loss:0.836364, Acc_avg:71.50% Training_loss_avg:0.572310
Epoch:4 Step:3032 Training_loss:0.520748, Acc_avg:72.25% Training_loss_avg:0.568741
Epoch:4 Step:3040 Training_loss:0.412200, Acc_avg:72.25% Training_loss_avg:0.566795
Epoch:4 Step:3048 Training_loss:0.821325, Acc_avg:71.50% Training_loss_avg:0.571175
Epoch:4 Step:3056 Training_loss:0.576077, Acc_avg:71.00% Training_loss_avg:0.572529
Epoch:4 Step:3064 Training_loss:0.509429, Acc_avg:71.50% Training_loss_avg:0.571286
Epoch:4 Step:3072 Tr

52it [00:07,  6.62it/s]


Epoch:4 Step:3224 Val_loss:0.642445, Val_Acc_avg:60.00%
Epoch:4 Step:3232 Training_loss:0.868251, Acc_avg:64.75% Training_loss_avg:0.619867
Epoch:4 Step:3240 Training_loss:0.633377, Acc_avg:65.00% Training_loss_avg:0.617536
Epoch:4 Step:3248 Training_loss:0.658400, Acc_avg:64.75% Training_loss_avg:0.618990
Epoch:4 Step:3256 Training_loss:0.483797, Acc_avg:64.75% Training_loss_avg:0.611503
Epoch:4 Step:3264 Training_loss:0.455788, Acc_avg:65.50% Training_loss_avg:0.610233
Epoch:4 Step:3272 Training_loss:0.694596, Acc_avg:64.75% Training_loss_avg:0.616390
Epoch:4 Step:3280 Training_loss:0.668522, Acc_avg:64.50% Training_loss_avg:0.619533
Epoch:4 Step:3288 Training_loss:0.491824, Acc_avg:65.00% Training_loss_avg:0.615889
Epoch:4 Step:3296 Training_loss:0.522814, Acc_avg:65.00% Training_loss_avg:0.613904
Epoch:4 Step:3304 Training_loss:0.501287, Acc_avg:65.75% Training_loss_avg:0.611481
Epoch:4 Step:3312 Training_loss:0.601969, Acc_avg:65.25% Training_loss_avg:0.614005
Epoch:4 Step:3320 Tr

In [119]:
class BERT_Arch(nn.Module):

    def __init__(self, bert):
      
      super(BERT_Arch, self).__init__()

      self.bert = bert 
      
      # dropout layer
      self.dropout = nn.Dropout(0.1)
      
      # relu activation function
      self.relu =  nn.ReLU()

      # dense layer 1
      self.fc1 = nn.Linear(512,512)
      
      # dense layer 2 (Output layer)
      self.fc2 = nn.Linear(512,2)

      #softmax activation function
      self.softmax = nn.LogSoftmax(dim=1)

    #define the forward pass
    def forward(self, sent_id, mask):

      #pass the inputs to the model  
      cls_hs = self.bert(sent_id, attention_mask=mask)

      x = self.fc1(cls_hs.logits)

      x = self.relu(x)

      x = self.dropout(x)

      # output layer
      x = self.fc2(x)
      
      # apply softmax activation
      x = self.softmax(x)

      return x

In [150]:
"""
Main configuration function for a given finetune run
:return: None
"""
run_name = "lr_5e-5 redo, 512 split with attention! custom model redo 3"
run_dir = "codebert_finetune_runs/{}".format(run_name)
model_name = 'codebert-base'
checkpoint_location = None
online = False
load_splits = False
save_data = True

print("generating data splits")

code_df = preprocess_data(file_loc='code_dataset.jsonl')
train_data, val_data, test_data = tokenize(code_df, model_name=model_name)

X_train, A_train, Y_train = train_data
X_val, A_val, Y_val = val_data
X_test, A_test, Y_test = test_data

data_type = ['train', 'val', 'test']
data_split_type = ['X', 'A', 'Y']


# Creating dir to save logs and checkpoints, re
dir_name = "{}".format(run_dir)
if os.path.exists(dir_name):
  input("run name already exists, press Enter to overwrite")
else:
  os.makedirs(dir_name)

if save_data:
  print("saving data splits")

  data_all = [train_data, val_data, test_data]
  for i, data in enumerate(data_all):
    for j, split in enumerate(data):
      with open('{}/{}_{}.pickle'.format(run_dir,data_type[i], data_split_type[j]), 'wb') as handle:
        pickle.dump(split, handle)

# Loading model from checkpoint if location provided
if online:
  print("loading model from online")
  model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")
elif checkpoint_location is None:
  print("loading model from local repo")

  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=512)
else:
  print("loading model from checkpoint: {}".format(checkpoint_location))
  model = AutoModelForSequenceClassification.from_pretrained(checkpoint_location)






generating data splits
Insecure code counts: 3729, Total code counts: 8000, Proportion 0.466125
Data points: 8000




saving data splits
loading model from local repo


Some weights of the model checkpoint at codebert-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at codebert-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for p

In [151]:
# freeze all the parameters
for param in model.parameters():
    param.requires_grad = False

custom_model = BERT_Arch(model)

In [152]:
custom_model = custom_model.to(device)

In [153]:

def train_custom(model, train_data, val_data, epochs=5, batch_size=16, learning_rate=2e-5, validate_per=500,
          run_name="temp", run_descrption=None):
    """
    Main fine-tuning training loop for the provided model

    :param model: model loaded with predefined weights
    :param train_data: tuple of X_train, A_train, Y_train (X = inputs, A = attention, Y = target)
    :param val_data: tuple X_val, A_val, Y_val
    :param epochs: number of epochs for training
    :param batch_size: batch size (see note below about batch_hack)
    :param learning_rate: optimizer learning rate
    :param validate_per: number of weight updates before validation occurs
                            (notes: - if batch_size = 32, and validate_per = 32, validation will occur every batch
                                    - this is wrt the start of each epoch
                                    - validation will always occour at the start of each epoch (step 0))
    :param run_name: name used to saving checkpoints and log files within codebert_finetune_runs
    :param run_descrption: string that is saved to info.txt describing the run


    :return: None (models are saved in checkpoints along with log data)
    """


    # Saving run description.txt
    if run_descrption is not None:
        with open("{}/info.txt".format(dir_name), "a+") as f:
            f.write(run_descrption)

    # Unpacking data
    X_train, A_train, Y_train = train_data
    X_val, A_val, Y_val = val_data


    batch_hack = batch_size  # See note below regarding limited GPU memory

    # Initializing arrays for tracking loss
    train_loss_hist = []
    val_loss_hist = []
    train_pred_hist = []
    # Counter to track batches (see note below related to GPU memory)
    batch_count = 0
    # validate_per_batch = int(validate_per/batch_hack)

    # Moving model to GPU if configured
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=learning_rate)

    validate_per = int(validate_per/batch_size)

    cross_entropy  = nn.NLLLoss() 
    for epoch in range(epochs):

        # Generating random index for manual shuffling of data each epoch as note using DataLoaders
        permutation = torch.randperm(X_train.shape[0])

        # Note here that only a single element is loaded at each iteration (batch size = 1) due to GPU memory constraint
        for batch_id, i in enumerate(range(0, X_train.shape[0], batch_hack)):

            # Loading batch and moving to device
            indices = permutation[i:i + batch_hack]
            batch_X, batch_Y, batch_A = X_train[indices].to(device), Y_train[indices].to(device), A_train[indices].to(device)


            batch_Y_one_hot = torch.nn.functional.one_hot(batch_Y, num_classes= 2)

            model.train()

            # Forward pass
            #outputs = model(batch_X,labels=batch_Y, attention_mask=batch_A)
            outputs = model(batch_X, batch_A)

            #loss = criterion(loss_clsf.float(), batch_Y_one_hot.float())
            #loss = outputs.loss

            loss = cross_entropy(outputs, batch_Y)


            # Clip params
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)


            # Backward pass
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            loss_clsf = nn.Softmax(dim=1)(outputs)
            acc = np.average(torch.eq(batch_Y.cpu(), loss_clsf.argmax(axis=1).cpu()))
            #rint(correct)

            # Tracking loss
            train_loss_hist.append(float(loss.item()))
            train_pred_hist.append(acc)

            # Training output
            train_output = "Epoch:{} Step:{} Training_loss:{:.6f}, Acc_avg:{:.2f}%".format(epoch, i, loss.item(), np.sum(100*train_pred_hist[-50:])/min(len(train_pred_hist), 50))
            print(train_output+" Training_loss_avg:{:.6f}".format(np.average(train_loss_hist[-50:])))
            with open("{}/train_loss.txt".format(dir_name), "a+") as f:
                f.write(train_output+"\n")

            # Validation
            if batch_id % validate_per == 0:
                val_loss_total = 0
                model.eval()
                print("Validating:")
                val_acc = []
                for val_badtch_id, j in tqdm(enumerate(range(0, X_val.shape[0], batch_hack))):
                    # Loading singular validation data (overwrites train data as can only load 1 intp GPU)
                    batch_X, batch_Y, batch_A = X_val[j:j+batch_hack].to(device), Y_val[j:j+batch_hack].to(device), A_val[j:j+batch_hack].to(device)

                    with torch.no_grad():
                        outputs = model(batch_X, batch_A)

                    val_loss = cross_entropy(outputs, batch_Y)
                    val_loss_total += float(val_loss)

                    
                    val_clsf = nn.Softmax(dim=1)(outputs)
                    val_acc.append(np.average(torch.eq(batch_Y.cpu(), val_clsf.argmax(axis=1).cpu())))

                    del batch_X
                    del batch_Y

                # Adding average loss to tracker
                val_average = val_loss_total / (val_badtch_id+1)
                val_loss_hist.append(val_average)

                # Validation output and logging
                val_output = "Epoch:{} Step:{} Val_loss:{:.6f}, Val_Acc_avg:{:.2f}%".format(epoch, i, val_average, np.sum(100*val_acc[-50:])/min(len(val_acc), 50))
                print(val_output)
                with open("{}/val_los.txt".format(dir_name), "a+") as f:
                    f.write(val_output+"\n")

        # End of epoch checkpoint
        #model.save_pretrained("{}/epoch_{}".format(dir_name, epoch + 1))
        torch.save(model, "{}/epoch_{}".format(dir_name, epoch + 1))


In [None]:
train_custom(model=custom_model,
      train_data=train_data,
      val_data=val_data,
      epochs=10,
      batch_size=8,
      learning_rate=5e-5,
      validate_per=250,
      run_name=run_name,
      run_descrption="lr_5e-5 redo")

# New Section

# New Section