In [1]:
# %matplotlib ipympl

import os
import glob
import torch
import tqdm
import time
import csv
from torch import nn
from torch.utils.data import DataLoader, ConcatDataset, random_split
# from learning.model.nodevalue import DWNodeValueModel
from learning.data.process_utils import move_to_device
from learning.data.nv_utils import (
    extract_input, extract_output_target, collate_nv_data
)
from learning.data.nv_dataset import NodeValueDataset
from learning.model.nodevalue import NodeValueModel

In [2]:
history_folder = "server/history/"
history_files = sorted(glob.glob(os.path.join(history_folder, "history_*.json")))

datasets = [NodeValueDataset(f) for f in history_files]
concatenated_dataset = ConcatDataset(datasets)

train_ds, val_ds, test_ds = random_split(
    concatenated_dataset,
    [0.8, 0.1, 0.1],
)

In [3]:
# Set a fixed seed for reproducibility
seed = 42
torch.manual_seed(seed)
generator = torch.Generator().manual_seed(seed)

train_loader = DataLoader(train_ds, 10000, pin_memory=True, shuffle=True, 
                         collate_fn=collate_nv_data, generator=generator)
val_loader = DataLoader(val_ds, 20000, pin_memory=True, shuffle=True, 
                       collate_fn=collate_nv_data, generator=generator)

print("train_ds", len(train_ds))
print("val_ds", len(val_ds))
print("test_ds", len(test_ds))

train_ds 578341
val_ds 72292
test_ds 72292


In [4]:
batch = next(iter(train_loader))

In [5]:
torch.bincount(batch["winners"])

tensor([1311, 1260, 1218, 1247, 1259, 1198, 1248, 1259])

In [6]:
perm = torch.tensor([7,6,5,4,3,2,1,0])
perm[[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]]

tensor([7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0])

In [7]:
def calculate_mean(train_loader):
    # Calculate statistics properly with variable-sized batches
    sum_of_values = 0.0
    count_of_values = 0
    feature_sums = None
    batch_count = 0

    for batch in tqdm.tqdm(train_loader, desc="Calculating average node states"):
        current_states = batch["nodes_states"]
        
        # Add to running statistics
        sum_of_values += current_states.sum().item()
        count_of_values += current_states.shape[0]
        
        # For per-feature statistics
        if feature_sums is None:
            # Initialize with zeros matching the feature dimension
            feature_sums = torch.zeros(current_states.shape[1], dtype=torch.float32)
        
        # Sum across batch dimension for feature-wise statistics
        feature_sums += current_states.sum(dim=0)
        
        batch_count += 1

    # Calculate the averages
    feature_averages = feature_sums / count_of_values 

    print(f"Statistics calculated over {batch_count} batches")
    print(f"Shape of feature dimension: {feature_sums.shape}")
    print(f"Feature-wise averages:")
    for i, avg in enumerate(feature_averages):
        print(f"  Feature {i}: {avg.item():.6f}")
    return feature_averages

def calculate_std(train_loader, feature_averages):
    # Calculate statistics for standard deviation with variable-sized batches
    sum_squared_diff = 0.0
    count_of_values = 0
    feature_sum_squared_diff = None
    batch_count = 0

    for batch in tqdm.tqdm(train_loader, desc="Calculating std dev of node states"):
        current_states = batch["nodes_states"]
        
        # Add to running statistics
        sum_squared_diff += ((current_states - feature_averages) ** 2).sum().item()
        count_of_values += current_states.shape[0]
        
        # For per-feature statistics
        if feature_sum_squared_diff is None:
            # Initialize with zeros matching the feature dimension
            feature_sum_squared_diff = torch.zeros(current_states.shape[1], dtype=torch.float32)
        
        # Sum squared differences across batch dimension for feature-wise statistics
        feature_sum_squared_diff += ((current_states - feature_averages) ** 2).sum(dim=0)
        
        batch_count += 1

    # Calculate the standard deviations
    overall_std = torch.sqrt(torch.tensor(sum_squared_diff / count_of_values))
    feature_stds = torch.sqrt(feature_sum_squared_diff / count_of_values)

    print(f"Standard deviation statistics calculated over {batch_count} batches")
    print(f"Overall standard deviation: {overall_std.item():.6f}")
    print(f"Feature-wise standard deviations:")
    for i, std in enumerate(feature_stds):
        print(f"  Feature {i}: {std.item():.6f}")
    return feature_stds

In [8]:
model_state = None
optim_state = None
latest_epoch = -1

with_dice_scatter = True

model_name = "nodevalue" + ("_no_dice" if not with_dice_scatter else "")
models_dir = f"learning/{model_name}_checkpoints" 

os.makedirs(models_dir, exist_ok=True)

# Find the latest checkpoint file (with highest epoch number)
checkpoint_files = glob.glob(os.path.join(models_dir, f"{model_name}_*.pt"))
if checkpoint_files:
    # Extract epoch numbers from filenames
    epoch_nums = [int(f.split("_")[-1].split(".")[0]) for f in checkpoint_files]
    latest_epoch = max(epoch_nums)
    latest_checkpoint = os.path.join(models_dir, f"{model_name}_{latest_epoch:06}.pt")
    print(f"Loading latest checkpoint: {latest_checkpoint} (epoch {latest_epoch})")
    checkpoint = torch.load(latest_checkpoint, map_location=torch.device('cpu'))
    model_state = checkpoint["model_state"]
    optim_state = checkpoint["optim_state"]
else:
    print("No valid checkpoint files found")
    
start_epoch = latest_epoch + 1

Loading latest checkpoint: learning/nodevalue_checkpoints/nodevalue_000699.pt (epoch 699)


In [9]:
node_value_model = NodeValueModel(with_dice_scatter=with_dice_scatter)
num_params = sum(p.numel() for p in node_value_model.parameters() if p.requires_grad)
print(f"Model created. Total trainable parameters: {num_params:,}")

Model created. Total trainable parameters: 4,244


In [None]:
# Load the model before everything else so that I can freeze some layers
if model_state is not None:
    node_value_model.load_state_dict(model_state)
    print(f"Model state loaded.")

# node_value_model.gat_layers[0].requires_grad_(False)
# node_value_model.gat_layers[1].passthrough_coef.requires_grad_(False)
# node_value_model.gat_layers[2].requires_grad_(False)

# node_value_model.gat_layers[1].reset_parameters() 
# nn.init.constant_(node_value_model.gat_layers[1].passthrough_coef, -2)

# node_value_model.gat_layers[2].reset_parameters() 
# nn.init.constant_(node_value_model.gat_layers[2].passthrough_coef, -1)

Model state loaded.


Parameter containing:
tensor(-2., requires_grad=True)

In [None]:
# ── CSV set‑up: log one row per epoch ───────────────────────────────────────
csv_path = f"{model_name}_epoch_metrics.csv"
epoch_log_file_exists = os.path.isfile(csv_path)
if epoch_log_file_exists and os.path.getsize(csv_path) == 0:
    os.remove(csv_path)
    epoch_log_file_exists = False

csv_file = open(csv_path, "a", newline="")  # Open in append mode
csv_writer = csv.writer(csv_file)

# Only write header if file doesn't exist yet
if not epoch_log_file_exists:
    csv_writer.writerow([
        "epoch",           # 0‑based epoch index
        "train_loss",      # average training loss for the epoch
        "val_loss",        # average validation loss for the epoch
        "train_time_sec",  # seconds spent in training phase
        "val_time_sec",    # seconds spent in validation phase
        "total_time_sec"   # train + val
    ])

# ── model / optimizer prep ─────────────────────────────────────────────────

n_epochs   = 10000
device     = torch.device("cuda", 0)
train_time_limit = 10 * 60
val_time_limit = train_time_limit / 5
criterion  = nn.CrossEntropyLoss()

node_value_model = node_value_model.to(device)

optimizer  = torch.optim.Adam(
    [p for p in node_value_model.parameters() if p.requires_grad],
    lr=1e-2
)
reset_optimizer = True
if not reset_optimizer and optim_state is not None:
    optimizer.load_state_dict(optim_state)

scheduler  = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min",
    factor=0.25,
    patience=10,
    threshold=0.0001,
    min_lr=1e-8
)


In [12]:
# ── INITIAL (untrained) LOSS EVALUATION ───────────────────────────────────────
if not epoch_log_file_exists:
    node_value_model.eval()
    with torch.no_grad():
        t_start = time.time()

        # avg loss on training set *without* gradient tracking
        init_train_sum, init_train_batches = 0.0, 0
        for b in tqdm.tqdm(train_loader, desc="init‑train"):
            b = move_to_device(b, device)
            out = node_value_model(*extract_input(b, with_dice_scatter=with_dice_scatter))
            o, t = extract_output_target(b, out)
            init_train_sum += criterion(o, t).item()
            init_train_batches += 1
            if (time.time() - t_start) > train_time_limit: 
                print(f"Stopping training after >{train_time_limit} seconds.")
                break
        init_train_loss = init_train_sum / init_train_batches
        t_train_done = time.time()

        # avg loss on validation set
        init_val_sum, init_val_batches = 0.0, 0
        for vb in tqdm.tqdm(val_loader, desc="init‑val"):
            vb = move_to_device(vb, device)
            vout = node_value_model(*extract_input(vb, with_dice_scatter=with_dice_scatter))
            vo, vt = extract_output_target(
                vb, vout
            )
            init_val_sum += criterion(vo, vt).item()
            init_val_batches += 1
            if (time.time() - t_train_done) > val_time_limit:
                print(f"Stopping validation after >{val_time_limit} seconds.")
                break
        init_val_loss = init_val_sum / init_val_batches
        t_val_done = time.time()

    # times
    init_train_time = t_train_done - t_start
    init_val_time   = t_val_done - t_train_done
    init_total_time = t_val_done - t_start

    # write initial row (epoch = None)
    csv_writer.writerow([
        -1,
        init_train_loss,
        init_val_loss,
        init_train_time,
        init_val_time,
        init_total_time
    ])
    csv_file.flush()
    print(
        f"Initial (-1) | "
        f"Train {init_train_loss:.4f} | "
        f"Val {init_val_loss:.4f} | "
        f"Time {init_total_time:.1f}s (T {init_train_time:.1f}s | "
        f"V {init_val_time:.1f}s)"
    )

In [13]:
scheduler.get_last_lr()

[0.01]

In [14]:
for epoch in range(start_epoch, n_epochs):
    t_start = time.time()

    # ── TRAINING -----------------------------------------------------------
    node_value_model.train()
    sum_train_loss = torch.tensor(0.0, device=device)
    n_train_batches = 0

    t_batch_tqdm = tqdm.tqdm(
        train_loader, 
        desc=f"train {epoch}" 
    )
    for t_batch in t_batch_tqdm:
        t_batch = move_to_device(t_batch, device)

        optimizer.zero_grad()

        model_out = node_value_model(*extract_input(t_batch, with_dice_scatter=with_dice_scatter))
        outputs, targets = extract_output_target(t_batch, model_out)
        loss = criterion(outputs, targets)
        
        loss.backward()
        optimizer.step()
    
        if torch.isnan(loss) or torch.isinf(loss):
            raise ValueError(f"Loss contains NaN or Inf values")
        
        sum_train_loss += loss.detach()
        n_train_batches += 1
        t_batch_tqdm.set_postfix(av_loss=sum_train_loss.item()/n_train_batches)
        if (time.time() - t_start) > train_time_limit: 
            print(f"Stopping training after >{train_time_limit} seconds.")
            break
        
    t_batch_tqdm.close()

    avg_train_loss = sum_train_loss.item() / n_train_batches
    t_train_done = time.time()

    # ── VALIDATION ---------------------------------------------------------
    node_value_model.eval()
    sum_val_loss = 0.0
    n_val_batches = 0
    with torch.no_grad():
        v_batch_tqdm = tqdm.tqdm(
            val_loader, 
            desc=f"val {epoch}" 
        )
        for v_batch in v_batch_tqdm:
            v_batch = move_to_device(v_batch, device)

            v_out = node_value_model(*extract_input(v_batch, with_dice_scatter=with_dice_scatter))
            v_outputs, v_targets = extract_output_target(v_batch, v_out)
            val_loss = criterion(v_outputs, v_targets)

            if torch.isnan(val_loss) or torch.isinf(val_loss):
                raise ValueError(f"Loss contains NaN or Inf values")
            
            sum_val_loss += val_loss
            n_val_batches += 1
            v_batch_tqdm.set_postfix(av_loss=sum_val_loss.item()/n_val_batches)
            if (time.time() - t_train_done) > val_time_limit:
                print(f"Stopping validation after >{val_time_limit} seconds.")
                break

        v_batch_tqdm.close()

    avg_val_loss = sum_val_loss.item() / n_val_batches
    t_val_done = time.time()

    # Step the learning rate scheduler after each epoch
    scheduler.step(avg_val_loss)

    # ── CSV logging ---------------------------------------------------------
    train_time = t_train_done - t_start
    val_time   = t_val_done   - t_train_done
    total_time = t_val_done   - t_start


    checkpoint_data = {
        "epoch": epoch,
        "optim_state": optimizer.state_dict(),
        "model_state": node_value_model.state_dict()
    }
    torch.save(checkpoint_data, os.path.join(models_dir, f"{model_name}_{epoch:06}.pt"))
    
    csv_writer.writerow([
        epoch,
        avg_train_loss,
        avg_val_loss,
        train_time,
        val_time,
        total_time
    ])
    csv_file.flush()  # ensure data is written even if run aborts

    # ── console printout ----------------------------------------------------
    print(
        f"Epoch {epoch} | "
        f"Train {avg_train_loss:.4f} | "
        f"Val {avg_val_loss:.4f} | "
        f"LR {scheduler.get_last_lr()[0]:.6f}"
    )
# ── tidy‑up ----------------------------------------------------------------
csv_file.close()

train 700:   0%|          | 0/58 [00:00<?, ?it/s]

  max_per_index.index_reduce_(
train 700: 100%|██████████| 58/58 [00:59<00:00,  1.02s/it, av_loss=1.04]
val 700: 100%|██████████| 4/4 [00:06<00:00,  1.71s/it, av_loss=0.871]


Epoch 700 | Train 1.0426 | Val 0.8709 | LR 0.010000


train 701: 100%|██████████| 58/58 [01:00<00:00,  1.04s/it, av_loss=0.851]
val 701: 100%|██████████| 4/4 [00:06<00:00,  1.66s/it, av_loss=0.752]


Epoch 701 | Train 0.8508 | Val 0.7518 | LR 0.010000


train 702: 100%|██████████| 58/58 [01:00<00:00,  1.05s/it, av_loss=0.778]
val 702: 100%|██████████| 4/4 [00:06<00:00,  1.70s/it, av_loss=0.675]


Epoch 702 | Train 0.7780 | Val 0.6754 | LR 0.010000


train 703: 100%|██████████| 58/58 [01:01<00:00,  1.06s/it, av_loss=0.743]
val 703: 100%|██████████| 4/4 [00:06<00:00,  1.71s/it, av_loss=0.644]


Epoch 703 | Train 0.7430 | Val 0.6443 | LR 0.010000


train 704: 100%|██████████| 58/58 [01:02<00:00,  1.08s/it, av_loss=0.729]
val 704: 100%|██████████| 4/4 [00:06<00:00,  1.73s/it, av_loss=0.632]


Epoch 704 | Train 0.7294 | Val 0.6317 | LR 0.010000


train 705: 100%|██████████| 58/58 [01:02<00:00,  1.08s/it, av_loss=0.722]
val 705: 100%|██████████| 4/4 [00:06<00:00,  1.70s/it, av_loss=0.623]


Epoch 705 | Train 0.7221 | Val 0.6232 | LR 0.010000


train 706: 100%|██████████| 58/58 [01:01<00:00,  1.07s/it, av_loss=0.716]
val 706: 100%|██████████| 4/4 [00:06<00:00,  1.75s/it, av_loss=0.62] 


Epoch 706 | Train 0.7160 | Val 0.6196 | LR 0.010000


train 707: 100%|██████████| 58/58 [01:02<00:00,  1.08s/it, av_loss=0.714]
val 707: 100%|██████████| 4/4 [00:06<00:00,  1.69s/it, av_loss=0.613]


Epoch 707 | Train 0.7136 | Val 0.6125 | LR 0.010000


train 708: 100%|██████████| 58/58 [01:02<00:00,  1.08s/it, av_loss=0.709]
val 708: 100%|██████████| 4/4 [00:07<00:00,  1.79s/it, av_loss=0.607]


Epoch 708 | Train 0.7091 | Val 0.6071 | LR 0.010000


train 709: 100%|██████████| 58/58 [01:03<00:00,  1.09s/it, av_loss=0.706]
val 709: 100%|██████████| 4/4 [00:06<00:00,  1.74s/it, av_loss=0.61] 


Epoch 709 | Train 0.7059 | Val 0.6097 | LR 0.010000


train 710: 100%|██████████| 58/58 [01:03<00:00,  1.10s/it, av_loss=0.704]
val 710: 100%|██████████| 4/4 [00:07<00:00,  1.77s/it, av_loss=0.608]


Epoch 710 | Train 0.7036 | Val 0.6085 | LR 0.010000


train 711: 100%|██████████| 58/58 [01:04<00:00,  1.10s/it, av_loss=0.703]
val 711: 100%|██████████| 4/4 [00:07<00:00,  1.76s/it, av_loss=0.612]


Epoch 711 | Train 0.7027 | Val 0.6116 | LR 0.010000


train 712: 100%|██████████| 58/58 [01:02<00:00,  1.08s/it, av_loss=0.7]  
val 712: 100%|██████████| 4/4 [00:06<00:00,  1.70s/it, av_loss=0.6]  


Epoch 712 | Train 0.7004 | Val 0.5999 | LR 0.010000


train 713: 100%|██████████| 58/58 [01:02<00:00,  1.08s/it, av_loss=0.701]
val 713: 100%|██████████| 4/4 [00:07<00:00,  1.76s/it, av_loss=0.603]


Epoch 713 | Train 0.7014 | Val 0.6029 | LR 0.010000


train 714: 100%|██████████| 58/58 [01:02<00:00,  1.08s/it, av_loss=0.699]
val 714: 100%|██████████| 4/4 [00:06<00:00,  1.69s/it, av_loss=0.6]  


Epoch 714 | Train 0.6993 | Val 0.5996 | LR 0.010000


train 715: 100%|██████████| 58/58 [01:03<00:00,  1.10s/it, av_loss=0.697]
val 715: 100%|██████████| 4/4 [00:06<00:00,  1.70s/it, av_loss=0.601]


Epoch 715 | Train 0.6971 | Val 0.6012 | LR 0.010000


train 716: 100%|██████████| 58/58 [01:02<00:00,  1.07s/it, av_loss=0.695]
val 716: 100%|██████████| 4/4 [00:06<00:00,  1.70s/it, av_loss=0.597]


Epoch 716 | Train 0.6953 | Val 0.5969 | LR 0.010000


train 717: 100%|██████████| 58/58 [01:04<00:00,  1.10s/it, av_loss=0.696]
val 717: 100%|██████████| 4/4 [00:06<00:00,  1.70s/it, av_loss=0.607]


Epoch 717 | Train 0.6960 | Val 0.6066 | LR 0.010000


train 718: 100%|██████████| 58/58 [01:02<00:00,  1.08s/it, av_loss=0.696]
val 718: 100%|██████████| 4/4 [00:07<00:00,  1.76s/it, av_loss=0.597]


Epoch 718 | Train 0.6960 | Val 0.5971 | LR 0.010000


train 719: 100%|██████████| 58/58 [01:04<00:00,  1.11s/it, av_loss=0.695]
val 719: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.598]


Epoch 719 | Train 0.6950 | Val 0.5978 | LR 0.010000


train 720: 100%|██████████| 58/58 [01:04<00:00,  1.12s/it, av_loss=0.692]
val 720: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.592]


Epoch 720 | Train 0.6923 | Val 0.5915 | LR 0.010000


train 721: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.694]
val 721: 100%|██████████| 4/4 [00:07<00:00,  1.77s/it, av_loss=0.593]


Epoch 721 | Train 0.6943 | Val 0.5934 | LR 0.010000


train 722: 100%|██████████| 58/58 [01:01<00:00,  1.06s/it, av_loss=0.692]
val 722: 100%|██████████| 4/4 [00:06<00:00,  1.67s/it, av_loss=0.593]


Epoch 722 | Train 0.6915 | Val 0.5933 | LR 0.010000


train 723: 100%|██████████| 58/58 [01:01<00:00,  1.06s/it, av_loss=0.691]
val 723: 100%|██████████| 4/4 [00:06<00:00,  1.64s/it, av_loss=0.594]


Epoch 723 | Train 0.6913 | Val 0.5937 | LR 0.010000


train 724: 100%|██████████| 58/58 [01:00<00:00,  1.04s/it, av_loss=0.691]
val 724: 100%|██████████| 4/4 [00:06<00:00,  1.68s/it, av_loss=0.595]


Epoch 724 | Train 0.6907 | Val 0.5946 | LR 0.010000


train 725: 100%|██████████| 58/58 [01:00<00:00,  1.04s/it, av_loss=0.691]
val 725: 100%|██████████| 4/4 [00:06<00:00,  1.67s/it, av_loss=0.587]


Epoch 725 | Train 0.6912 | Val 0.5870 | LR 0.010000


train 726: 100%|██████████| 58/58 [01:00<00:00,  1.04s/it, av_loss=0.69] 
val 726: 100%|██████████| 4/4 [00:06<00:00,  1.67s/it, av_loss=0.591]


Epoch 726 | Train 0.6904 | Val 0.5911 | LR 0.010000


train 727: 100%|██████████| 58/58 [00:59<00:00,  1.02s/it, av_loss=0.69] 
val 727: 100%|██████████| 4/4 [00:06<00:00,  1.64s/it, av_loss=0.59] 


Epoch 727 | Train 0.6902 | Val 0.5898 | LR 0.010000


train 728: 100%|██████████| 58/58 [01:00<00:00,  1.03s/it, av_loss=0.688]
val 728: 100%|██████████| 4/4 [00:06<00:00,  1.67s/it, av_loss=0.589]


Epoch 728 | Train 0.6884 | Val 0.5890 | LR 0.010000


train 729: 100%|██████████| 58/58 [00:59<00:00,  1.03s/it, av_loss=0.687]
val 729: 100%|██████████| 4/4 [00:06<00:00,  1.65s/it, av_loss=0.591]


Epoch 729 | Train 0.6871 | Val 0.5907 | LR 0.010000


train 730: 100%|██████████| 58/58 [00:59<00:00,  1.03s/it, av_loss=0.689]
val 730: 100%|██████████| 4/4 [00:06<00:00,  1.65s/it, av_loss=0.594]


Epoch 730 | Train 0.6887 | Val 0.5938 | LR 0.010000


train 731: 100%|██████████| 58/58 [01:00<00:00,  1.04s/it, av_loss=0.688]
val 731: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.595]


Epoch 731 | Train 0.6885 | Val 0.5953 | LR 0.010000


train 732: 100%|██████████| 58/58 [01:04<00:00,  1.11s/it, av_loss=0.687]
val 732: 100%|██████████| 4/4 [00:07<00:00,  1.76s/it, av_loss=0.589]


Epoch 732 | Train 0.6874 | Val 0.5888 | LR 0.010000


train 733: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.687]
val 733: 100%|██████████| 4/4 [00:07<00:00,  1.79s/it, av_loss=0.592]


Epoch 733 | Train 0.6867 | Val 0.5920 | LR 0.010000


train 734: 100%|██████████| 58/58 [01:04<00:00,  1.12s/it, av_loss=0.689]
val 734: 100%|██████████| 4/4 [00:07<00:00,  1.75s/it, av_loss=0.588]


Epoch 734 | Train 0.6886 | Val 0.5878 | LR 0.010000


train 735: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.687]
val 735: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.585]


Epoch 735 | Train 0.6871 | Val 0.5849 | LR 0.010000


train 736: 100%|██████████| 58/58 [01:04<00:00,  1.11s/it, av_loss=0.689]
val 736: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.588]


Epoch 736 | Train 0.6895 | Val 0.5881 | LR 0.010000


train 737: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.691]
val 737: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.589]


Epoch 737 | Train 0.6910 | Val 0.5891 | LR 0.010000


train 738: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.688]
val 738: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.593]


Epoch 738 | Train 0.6878 | Val 0.5931 | LR 0.010000


train 739: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.685]
val 739: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.585]


Epoch 739 | Train 0.6849 | Val 0.5846 | LR 0.010000


train 740: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.687]
val 740: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.583]


Epoch 740 | Train 0.6867 | Val 0.5834 | LR 0.010000


train 741: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.685]
val 741: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.584]


Epoch 741 | Train 0.6852 | Val 0.5840 | LR 0.010000


train 742: 100%|██████████| 58/58 [01:05<00:00,  1.12s/it, av_loss=0.685]
val 742: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.584]


Epoch 742 | Train 0.6848 | Val 0.5844 | LR 0.010000


train 743: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.686]
val 743: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.591]


Epoch 743 | Train 0.6861 | Val 0.5908 | LR 0.010000


train 744: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.684]
val 744: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.582]


Epoch 744 | Train 0.6840 | Val 0.5819 | LR 0.010000


train 745: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.683]
val 745: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.588]


Epoch 745 | Train 0.6834 | Val 0.5883 | LR 0.010000


train 746: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.684]
val 746: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.585]


Epoch 746 | Train 0.6844 | Val 0.5847 | LR 0.010000


train 747: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.684]
val 747: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.582]


Epoch 747 | Train 0.6845 | Val 0.5824 | LR 0.010000


train 748: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.692]
val 748: 100%|██████████| 4/4 [00:07<00:00,  1.78s/it, av_loss=0.583]


Epoch 748 | Train 0.6917 | Val 0.5828 | LR 0.010000


train 749: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.687]
val 749: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.584]


Epoch 749 | Train 0.6873 | Val 0.5837 | LR 0.010000


train 750: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.685]
val 750: 100%|██████████| 4/4 [00:07<00:00,  1.77s/it, av_loss=0.584]


Epoch 750 | Train 0.6855 | Val 0.5835 | LR 0.010000


train 751: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.686]
val 751: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it, av_loss=0.585]


Epoch 751 | Train 0.6863 | Val 0.5852 | LR 0.010000


train 752: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.684]
val 752: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.583]


Epoch 752 | Train 0.6838 | Val 0.5834 | LR 0.010000


train 753: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.684]
val 753: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.58] 


Epoch 753 | Train 0.6840 | Val 0.5803 | LR 0.010000


train 754: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.683]
val 754: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it, av_loss=0.583]


Epoch 754 | Train 0.6830 | Val 0.5830 | LR 0.010000


train 755: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.685]
val 755: 100%|██████████| 4/4 [00:07<00:00,  1.77s/it, av_loss=0.581]


Epoch 755 | Train 0.6848 | Val 0.5810 | LR 0.010000


train 756: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.683]
val 756: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.584]


Epoch 756 | Train 0.6828 | Val 0.5844 | LR 0.010000


train 757: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.685]
val 757: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.583]


Epoch 757 | Train 0.6849 | Val 0.5831 | LR 0.010000


train 758: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.684]
val 758: 100%|██████████| 4/4 [00:07<00:00,  1.76s/it, av_loss=0.583]


Epoch 758 | Train 0.6836 | Val 0.5825 | LR 0.010000


train 759: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.684]
val 759: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.58] 


Epoch 759 | Train 0.6842 | Val 0.5805 | LR 0.010000


train 760: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.684]
val 760: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.582]


Epoch 760 | Train 0.6836 | Val 0.5824 | LR 0.010000


train 761: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.684]
val 761: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.58] 


Epoch 761 | Train 0.6841 | Val 0.5804 | LR 0.010000


train 762: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.683]
val 762: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.582]


Epoch 762 | Train 0.6833 | Val 0.5822 | LR 0.010000


train 763: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.682]
val 763: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.585]


Epoch 763 | Train 0.6824 | Val 0.5847 | LR 0.010000


train 764: 100%|██████████| 58/58 [01:04<00:00,  1.12s/it, av_loss=0.684]
val 764: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.583]


Epoch 764 | Train 0.6843 | Val 0.5825 | LR 0.002500


train 765: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.676]
val 765: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.573]


Epoch 765 | Train 0.6757 | Val 0.5730 | LR 0.002500


train 766: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.67] 
val 766: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.573]


Epoch 766 | Train 0.6700 | Val 0.5732 | LR 0.002500


train 767: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.671]
val 767: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.571]


Epoch 767 | Train 0.6710 | Val 0.5710 | LR 0.002500


train 768: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.67] 
val 768: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.571]


Epoch 768 | Train 0.6697 | Val 0.5710 | LR 0.002500


train 769: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.669]
val 769: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.573]


Epoch 769 | Train 0.6687 | Val 0.5734 | LR 0.002500


train 770: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.669]
val 770: 100%|██████████| 4/4 [00:07<00:00,  1.77s/it, av_loss=0.572]


Epoch 770 | Train 0.6691 | Val 0.5720 | LR 0.002500


train 771: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.67] 
val 771: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.572]


Epoch 771 | Train 0.6705 | Val 0.5715 | LR 0.002500


train 772: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.668]
val 772: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.569]


Epoch 772 | Train 0.6681 | Val 0.5693 | LR 0.002500


train 773: 100%|██████████| 58/58 [01:05<00:00,  1.12s/it, av_loss=0.67] 
val 773: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.574]


Epoch 773 | Train 0.6700 | Val 0.5741 | LR 0.002500


train 774: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.669]
val 774: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.571]


Epoch 774 | Train 0.6689 | Val 0.5713 | LR 0.002500


train 775: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.67] 
val 775: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.572]


Epoch 775 | Train 0.6697 | Val 0.5724 | LR 0.002500


train 776: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.669]
val 776: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.572]


Epoch 776 | Train 0.6687 | Val 0.5724 | LR 0.002500


train 777: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.669]
val 777: 100%|██████████| 4/4 [00:07<00:00,  1.77s/it, av_loss=0.57] 


Epoch 777 | Train 0.6687 | Val 0.5704 | LR 0.002500


train 778: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.67] 
val 778: 100%|██████████| 4/4 [00:07<00:00,  1.93s/it, av_loss=0.571]


Epoch 778 | Train 0.6700 | Val 0.5710 | LR 0.002500


train 779: 100%|██████████| 58/58 [01:05<00:00,  1.14s/it, av_loss=0.669]
val 779: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.57] 


Epoch 779 | Train 0.6689 | Val 0.5696 | LR 0.002500


train 780: 100%|██████████| 58/58 [01:05<00:00,  1.14s/it, av_loss=0.67] 
val 780: 100%|██████████| 4/4 [00:07<00:00,  1.79s/it, av_loss=0.571]


Epoch 780 | Train 0.6697 | Val 0.5710 | LR 0.002500


train 781: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.668]
val 781: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.574]


Epoch 781 | Train 0.6680 | Val 0.5745 | LR 0.002500


train 782: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.669]
val 782: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.57] 


Epoch 782 | Train 0.6688 | Val 0.5703 | LR 0.002500


train 783: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.667]
val 783: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.571]


Epoch 783 | Train 0.6673 | Val 0.5714 | LR 0.000625


train 784: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.666]
val 784: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.568]


Epoch 784 | Train 0.6663 | Val 0.5679 | LR 0.000625


train 785: 100%|██████████| 58/58 [01:05<00:00,  1.14s/it, av_loss=0.665]
val 785: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.568]


Epoch 785 | Train 0.6650 | Val 0.5685 | LR 0.000625


train 786: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.665]
val 786: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.567]


Epoch 786 | Train 0.6647 | Val 0.5672 | LR 0.000625


train 787: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.665]
val 787: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.568]


Epoch 787 | Train 0.6647 | Val 0.5675 | LR 0.000625


train 788: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.665]
val 788: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.569]


Epoch 788 | Train 0.6650 | Val 0.5688 | LR 0.000625


train 789: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.667]
val 789: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.571]


Epoch 789 | Train 0.6666 | Val 0.5705 | LR 0.000625


train 790: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.665]
val 790: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.567]


Epoch 790 | Train 0.6648 | Val 0.5668 | LR 0.000625


train 791: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.665]
val 791: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.567]


Epoch 791 | Train 0.6647 | Val 0.5672 | LR 0.000625


train 792: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.665]
val 792: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.569]


Epoch 792 | Train 0.6653 | Val 0.5693 | LR 0.000625


train 793: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.664]
val 793: 100%|██████████| 4/4 [00:07<00:00,  1.77s/it, av_loss=0.57] 


Epoch 793 | Train 0.6643 | Val 0.5697 | LR 0.000625


train 794: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.664]
val 794: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it, av_loss=0.568]


Epoch 794 | Train 0.6645 | Val 0.5681 | LR 0.000625


train 795: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.664]
val 795: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.567]


Epoch 795 | Train 0.6641 | Val 0.5669 | LR 0.000625


train 796: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.664]
val 796: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.566]


Epoch 796 | Train 0.6644 | Val 0.5657 | LR 0.000625


train 797: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.665]
val 797: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.567]


Epoch 797 | Train 0.6651 | Val 0.5668 | LR 0.000625


train 798: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.663]
val 798: 100%|██████████| 4/4 [00:07<00:00,  1.77s/it, av_loss=0.566]


Epoch 798 | Train 0.6631 | Val 0.5664 | LR 0.000625


train 799: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.664]
val 799: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.567]


Epoch 799 | Train 0.6637 | Val 0.5667 | LR 0.000625


train 800: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.663]
val 800: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.567]


Epoch 800 | Train 0.6634 | Val 0.5665 | LR 0.000625


train 801: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.663]
val 801: 100%|██████████| 4/4 [00:07<00:00,  1.78s/it, av_loss=0.566]


Epoch 801 | Train 0.6631 | Val 0.5657 | LR 0.000625


train 802: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.665]
val 802: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 802 | Train 0.6645 | Val 0.5667 | LR 0.000625


train 803: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.664]
val 803: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.566]


Epoch 803 | Train 0.6635 | Val 0.5663 | LR 0.000625


train 804: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.663]
val 804: 100%|██████████| 4/4 [00:07<00:00,  1.79s/it, av_loss=0.567]


Epoch 804 | Train 0.6630 | Val 0.5675 | LR 0.000625


train 805: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.665]
val 805: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.567]


Epoch 805 | Train 0.6647 | Val 0.5675 | LR 0.000625


train 806: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.664]
val 806: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it, av_loss=0.567]


Epoch 806 | Train 0.6638 | Val 0.5671 | LR 0.000625


train 807: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.664]
val 807: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.568]


Epoch 807 | Train 0.6637 | Val 0.5683 | LR 0.000156


train 808: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 808: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.566]


Epoch 808 | Train 0.6627 | Val 0.5660 | LR 0.000156


train 809: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 809: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 809 | Train 0.6627 | Val 0.5673 | LR 0.000156


train 810: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 810: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.568]


Epoch 810 | Train 0.6625 | Val 0.5680 | LR 0.000156


train 811: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 811: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.567]


Epoch 811 | Train 0.6620 | Val 0.5665 | LR 0.000156


train 812: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.662]
val 812: 100%|██████████| 4/4 [00:07<00:00,  1.78s/it, av_loss=0.567]


Epoch 812 | Train 0.6621 | Val 0.5673 | LR 0.000156


train 813: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.663]
val 813: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.566]


Epoch 813 | Train 0.6629 | Val 0.5660 | LR 0.000156


train 814: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 814: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.566]


Epoch 814 | Train 0.6630 | Val 0.5659 | LR 0.000156


train 815: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.663]
val 815: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.566]


Epoch 815 | Train 0.6627 | Val 0.5661 | LR 0.000156


train 816: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 816: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.566]


Epoch 816 | Train 0.6631 | Val 0.5662 | LR 0.000156


train 817: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 817: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.565]


Epoch 817 | Train 0.6619 | Val 0.5651 | LR 0.000156


train 818: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.664]
val 818: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.567]


Epoch 818 | Train 0.6636 | Val 0.5669 | LR 0.000156


train 819: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 819: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.566]


Epoch 819 | Train 0.6624 | Val 0.5659 | LR 0.000156


train 820: 100%|██████████| 58/58 [01:04<00:00,  1.12s/it, av_loss=0.662]
val 820: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.566]


Epoch 820 | Train 0.6625 | Val 0.5662 | LR 0.000156


train 821: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.662]
val 821: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.568]


Epoch 821 | Train 0.6624 | Val 0.5677 | LR 0.000156


train 822: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.664]
val 822: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.566]


Epoch 822 | Train 0.6636 | Val 0.5665 | LR 0.000156


train 823: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.664]
val 823: 100%|██████████| 4/4 [00:07<00:00,  1.93s/it, av_loss=0.567]


Epoch 823 | Train 0.6637 | Val 0.5669 | LR 0.000156


train 824: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.664]
val 824: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.569]


Epoch 824 | Train 0.6638 | Val 0.5686 | LR 0.000156


train 825: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 825: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.566]


Epoch 825 | Train 0.6627 | Val 0.5662 | LR 0.000156


train 826: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 826: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.565]


Epoch 826 | Train 0.6623 | Val 0.5653 | LR 0.000156


train 827: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.663]
val 827: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.568]


Epoch 827 | Train 0.6628 | Val 0.5675 | LR 0.000156


train 828: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 828: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 828 | Train 0.6623 | Val 0.5670 | LR 0.000039


train 829: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.663]
val 829: 100%|██████████| 4/4 [00:07<00:00,  1.91s/it, av_loss=0.567]


Epoch 829 | Train 0.6625 | Val 0.5670 | LR 0.000039


train 830: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 830: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.566]


Epoch 830 | Train 0.6635 | Val 0.5664 | LR 0.000039


train 831: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.663]
val 831: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.567]


Epoch 831 | Train 0.6627 | Val 0.5675 | LR 0.000039


train 832: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 832: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 832 | Train 0.6621 | Val 0.5669 | LR 0.000039


train 833: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 833: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.566]


Epoch 833 | Train 0.6623 | Val 0.5660 | LR 0.000039


train 834: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.663]
val 834: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.566]


Epoch 834 | Train 0.6633 | Val 0.5665 | LR 0.000039


train 835: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 835: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it, av_loss=0.567]


Epoch 835 | Train 0.6624 | Val 0.5666 | LR 0.000039


train 836: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.663]
val 836: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.567]


Epoch 836 | Train 0.6628 | Val 0.5668 | LR 0.000039


train 837: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.662]
val 837: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.567]


Epoch 837 | Train 0.6617 | Val 0.5673 | LR 0.000039


train 838: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.662]
val 838: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.567]


Epoch 838 | Train 0.6616 | Val 0.5670 | LR 0.000039


train 839: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 839: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.567]


Epoch 839 | Train 0.6628 | Val 0.5674 | LR 0.000010


train 840: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.661]
val 840: 100%|██████████| 4/4 [00:07<00:00,  1.93s/it, av_loss=0.567]


Epoch 840 | Train 0.6611 | Val 0.5666 | LR 0.000010


train 841: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.661]
val 841: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.566]


Epoch 841 | Train 0.6614 | Val 0.5660 | LR 0.000010


train 842: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.663]
val 842: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.566]


Epoch 842 | Train 0.6631 | Val 0.5664 | LR 0.000010


train 843: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 843: 100%|██████████| 4/4 [00:07<00:00,  1.79s/it, av_loss=0.567]


Epoch 843 | Train 0.6629 | Val 0.5666 | LR 0.000010


train 844: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 844: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.565]


Epoch 844 | Train 0.6627 | Val 0.5652 | LR 0.000010


train 845: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 845: 100%|██████████| 4/4 [00:07<00:00,  1.91s/it, av_loss=0.566]


Epoch 845 | Train 0.6624 | Val 0.5660 | LR 0.000010


train 846: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 846: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.566]


Epoch 846 | Train 0.6624 | Val 0.5664 | LR 0.000010


train 847: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 847: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.568]


Epoch 847 | Train 0.6632 | Val 0.5681 | LR 0.000010


train 848: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 848: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.567]


Epoch 848 | Train 0.6627 | Val 0.5667 | LR 0.000010


train 849: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 849: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 849 | Train 0.6624 | Val 0.5666 | LR 0.000010


train 850: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 850: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.566]


Epoch 850 | Train 0.6616 | Val 0.5663 | LR 0.000002


train 851: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 851: 100%|██████████| 4/4 [00:07<00:00,  1.93s/it, av_loss=0.567]


Epoch 851 | Train 0.6622 | Val 0.5669 | LR 0.000002


train 852: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.662]
val 852: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.567]


Epoch 852 | Train 0.6616 | Val 0.5667 | LR 0.000002


train 853: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.663]
val 853: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.567]


Epoch 853 | Train 0.6626 | Val 0.5667 | LR 0.000002


train 854: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 854: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.566]


Epoch 854 | Train 0.6630 | Val 0.5665 | LR 0.000002


train 855: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.662]
val 855: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.568]


Epoch 855 | Train 0.6619 | Val 0.5675 | LR 0.000002


train 856: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 856: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.568]


Epoch 856 | Train 0.6634 | Val 0.5677 | LR 0.000002


train 857: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 857: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.566]


Epoch 857 | Train 0.6625 | Val 0.5664 | LR 0.000002


train 858: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 858: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.567]


Epoch 858 | Train 0.6619 | Val 0.5669 | LR 0.000002


train 859: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 859: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.567]


Epoch 859 | Train 0.6617 | Val 0.5672 | LR 0.000002


train 860: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.662]
val 860: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.566]


Epoch 860 | Train 0.6623 | Val 0.5660 | LR 0.000002


train 861: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 861: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.566]


Epoch 861 | Train 0.6623 | Val 0.5659 | LR 0.000001


train 862: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 862: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.567]


Epoch 862 | Train 0.6623 | Val 0.5665 | LR 0.000001


train 863: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 863: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 863 | Train 0.6625 | Val 0.5671 | LR 0.000001


train 864: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 864: 100%|██████████| 4/4 [00:07<00:00,  1.91s/it, av_loss=0.568]


Epoch 864 | Train 0.6628 | Val 0.5676 | LR 0.000001


train 865: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.663]
val 865: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.566]


Epoch 865 | Train 0.6630 | Val 0.5661 | LR 0.000001


train 866: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 866: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 866 | Train 0.6618 | Val 0.5666 | LR 0.000001


train 867: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 867: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.566]


Epoch 867 | Train 0.6629 | Val 0.5663 | LR 0.000001


train 868: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.662]
val 868: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.566]


Epoch 868 | Train 0.6622 | Val 0.5664 | LR 0.000001


train 869: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 869: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.566]


Epoch 869 | Train 0.6615 | Val 0.5665 | LR 0.000001


train 870: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 870: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 870 | Train 0.6618 | Val 0.5668 | LR 0.000001


train 871: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.662]
val 871: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.567]


Epoch 871 | Train 0.6617 | Val 0.5671 | LR 0.000001


train 872: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.661]
val 872: 100%|██████████| 4/4 [00:07<00:00,  1.78s/it, av_loss=0.567]


Epoch 872 | Train 0.6614 | Val 0.5665 | LR 0.000000


train 873: 100%|██████████| 58/58 [01:05<00:00,  1.12s/it, av_loss=0.66] 
val 873: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.566]


Epoch 873 | Train 0.6600 | Val 0.5664 | LR 0.000000


train 874: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.661]
val 874: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.566]


Epoch 874 | Train 0.6608 | Val 0.5662 | LR 0.000000


train 875: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.663]
val 875: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.567]


Epoch 875 | Train 0.6627 | Val 0.5673 | LR 0.000000


train 876: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 876: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.566]


Epoch 876 | Train 0.6627 | Val 0.5659 | LR 0.000000


train 877: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.662]
val 877: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 877 | Train 0.6617 | Val 0.5668 | LR 0.000000


train 878: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 878: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.566]


Epoch 878 | Train 0.6621 | Val 0.5660 | LR 0.000000


train 879: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 879: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.567]


Epoch 879 | Train 0.6624 | Val 0.5667 | LR 0.000000


train 880: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.661]
val 880: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.566]


Epoch 880 | Train 0.6615 | Val 0.5661 | LR 0.000000


train 881: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 881: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.566]


Epoch 881 | Train 0.6630 | Val 0.5663 | LR 0.000000


train 882: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 882: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 882 | Train 0.6632 | Val 0.5672 | LR 0.000000


train 883: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.662]
val 883: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.566]


Epoch 883 | Train 0.6615 | Val 0.5658 | LR 0.000000


train 884: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 884: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.567]


Epoch 884 | Train 0.6624 | Val 0.5668 | LR 0.000000


train 885: 100%|██████████| 58/58 [01:05<00:00,  1.14s/it, av_loss=0.663]
val 885: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.567]


Epoch 885 | Train 0.6630 | Val 0.5673 | LR 0.000000


train 886: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 886: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.568]


Epoch 886 | Train 0.6626 | Val 0.5676 | LR 0.000000


train 887: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 887: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.566]


Epoch 887 | Train 0.6628 | Val 0.5657 | LR 0.000000


train 888: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 888: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.566]


Epoch 888 | Train 0.6622 | Val 0.5656 | LR 0.000000


train 889: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 889: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 889 | Train 0.6623 | Val 0.5667 | LR 0.000000


train 890: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 890: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.567]


Epoch 890 | Train 0.6628 | Val 0.5669 | LR 0.000000


train 891: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.661]
val 891: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.566]


Epoch 891 | Train 0.6615 | Val 0.5662 | LR 0.000000


train 892: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 892: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 892 | Train 0.6625 | Val 0.5673 | LR 0.000000


train 893: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 893: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.566]


Epoch 893 | Train 0.6629 | Val 0.5660 | LR 0.000000


train 894: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 894: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.566]


Epoch 894 | Train 0.6618 | Val 0.5658 | LR 0.000000


train 895: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.661]
val 895: 100%|██████████| 4/4 [00:07<00:00,  1.78s/it, av_loss=0.567]


Epoch 895 | Train 0.6613 | Val 0.5669 | LR 0.000000


train 896: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.661]
val 896: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.567]


Epoch 896 | Train 0.6615 | Val 0.5669 | LR 0.000000


train 897: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 897: 100%|██████████| 4/4 [00:07<00:00,  1.91s/it, av_loss=0.565]


Epoch 897 | Train 0.6624 | Val 0.5650 | LR 0.000000


train 898: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.661]
val 898: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.566]


Epoch 898 | Train 0.6612 | Val 0.5659 | LR 0.000000


train 899: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.663]
val 899: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 899 | Train 0.6629 | Val 0.5667 | LR 0.000000


train 900: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 900: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 900 | Train 0.6624 | Val 0.5673 | LR 0.000000


train 901: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.661]
val 901: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.567]


Epoch 901 | Train 0.6615 | Val 0.5668 | LR 0.000000


train 902: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.662]
val 902: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 902 | Train 0.6621 | Val 0.5666 | LR 0.000000


train 903: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.663]
val 903: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.566]


Epoch 903 | Train 0.6630 | Val 0.5662 | LR 0.000000


train 904: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 904: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.567]


Epoch 904 | Train 0.6620 | Val 0.5669 | LR 0.000000


train 905: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.661]
val 905: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.568]


Epoch 905 | Train 0.6609 | Val 0.5678 | LR 0.000000


train 906: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.661]
val 906: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.567]


Epoch 906 | Train 0.6615 | Val 0.5670 | LR 0.000000


train 907: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 907: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.566]


Epoch 907 | Train 0.6617 | Val 0.5664 | LR 0.000000


train 908: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 908: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.568]


Epoch 908 | Train 0.6623 | Val 0.5677 | LR 0.000000


train 909: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.664]
val 909: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.568]


Epoch 909 | Train 0.6637 | Val 0.5678 | LR 0.000000


train 910: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 910: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 910 | Train 0.6620 | Val 0.5668 | LR 0.000000


train 911: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 911: 100%|██████████| 4/4 [00:07<00:00,  1.93s/it, av_loss=0.567]


Epoch 911 | Train 0.6620 | Val 0.5671 | LR 0.000000


train 912: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.663]
val 912: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 912 | Train 0.6627 | Val 0.5666 | LR 0.000000


train 913: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.661]
val 913: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.567]


Epoch 913 | Train 0.6610 | Val 0.5672 | LR 0.000000


train 914: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 914: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.566]


Epoch 914 | Train 0.6620 | Val 0.5660 | LR 0.000000


train 915: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.661]
val 915: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it, av_loss=0.566]


Epoch 915 | Train 0.6613 | Val 0.5662 | LR 0.000000


train 916: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.662]
val 916: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.566]


Epoch 916 | Train 0.6618 | Val 0.5661 | LR 0.000000


train 917: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 917: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 917 | Train 0.6621 | Val 0.5668 | LR 0.000000


train 918: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 918: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.567]


Epoch 918 | Train 0.6626 | Val 0.5668 | LR 0.000000


train 919: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.662]
val 919: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.566]


Epoch 919 | Train 0.6619 | Val 0.5665 | LR 0.000000


train 920: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.662]
val 920: 100%|██████████| 4/4 [00:07<00:00,  1.78s/it, av_loss=0.567]


Epoch 920 | Train 0.6621 | Val 0.5665 | LR 0.000000


train 921: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 921: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 921 | Train 0.6622 | Val 0.5671 | LR 0.000000


train 922: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 922: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 922 | Train 0.6621 | Val 0.5667 | LR 0.000000


train 923: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 923: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.566]


Epoch 923 | Train 0.6625 | Val 0.5664 | LR 0.000000


train 924: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 924: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.567]


Epoch 924 | Train 0.6620 | Val 0.5671 | LR 0.000000


train 925: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 925: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.567]


Epoch 925 | Train 0.6615 | Val 0.5667 | LR 0.000000


train 926: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 926: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 926 | Train 0.6628 | Val 0.5669 | LR 0.000000


train 927: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.664]
val 927: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.568]


Epoch 927 | Train 0.6636 | Val 0.5679 | LR 0.000000


train 928: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 928: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 928 | Train 0.6618 | Val 0.5665 | LR 0.000000


train 929: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 929: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.567]


Epoch 929 | Train 0.6625 | Val 0.5670 | LR 0.000000


train 930: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.661]
val 930: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.566]


Epoch 930 | Train 0.6610 | Val 0.5658 | LR 0.000000


train 931: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.661]
val 931: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 931 | Train 0.6614 | Val 0.5672 | LR 0.000000


train 932: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 932: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.567]


Epoch 932 | Train 0.6632 | Val 0.5673 | LR 0.000000


train 933: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 933: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.567]


Epoch 933 | Train 0.6628 | Val 0.5666 | LR 0.000000


train 934: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.662]
val 934: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.567]


Epoch 934 | Train 0.6624 | Val 0.5669 | LR 0.000000


train 935: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 935: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 935 | Train 0.6619 | Val 0.5666 | LR 0.000000


train 936: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 936: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.567]


Epoch 936 | Train 0.6621 | Val 0.5674 | LR 0.000000


train 937: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 937: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.566]


Epoch 937 | Train 0.6630 | Val 0.5660 | LR 0.000000


train 938: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 938: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.566]


Epoch 938 | Train 0.6623 | Val 0.5660 | LR 0.000000


train 939: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 939: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.567]


Epoch 939 | Train 0.6626 | Val 0.5671 | LR 0.000000


train 940: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.663]
val 940: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 940 | Train 0.6628 | Val 0.5667 | LR 0.000000


train 941: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.662]
val 941: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.567]


Epoch 941 | Train 0.6621 | Val 0.5669 | LR 0.000000


train 942: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 942: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 942 | Train 0.6626 | Val 0.5669 | LR 0.000000


train 943: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.661]
val 943: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 943 | Train 0.6614 | Val 0.5668 | LR 0.000000


train 944: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.663]
val 944: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.566]


Epoch 944 | Train 0.6629 | Val 0.5663 | LR 0.000000


train 945: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 945: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it, av_loss=0.568]


Epoch 945 | Train 0.6626 | Val 0.5677 | LR 0.000000


train 946: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 946: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.568]


Epoch 946 | Train 0.6626 | Val 0.5675 | LR 0.000000


train 947: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 947: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.567]


Epoch 947 | Train 0.6627 | Val 0.5675 | LR 0.000000


train 948: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 948: 100%|██████████| 4/4 [00:07<00:00,  1.91s/it, av_loss=0.566]


Epoch 948 | Train 0.6623 | Val 0.5657 | LR 0.000000


train 949: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 949: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.567]


Epoch 949 | Train 0.6623 | Val 0.5667 | LR 0.000000


train 950: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.663]
val 950: 100%|██████████| 4/4 [00:07<00:00,  1.79s/it, av_loss=0.566]


Epoch 950 | Train 0.6627 | Val 0.5658 | LR 0.000000


train 951: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 951: 100%|██████████| 4/4 [00:07<00:00,  1.91s/it, av_loss=0.567]


Epoch 951 | Train 0.6633 | Val 0.5673 | LR 0.000000


train 952: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 952: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it, av_loss=0.567]


Epoch 952 | Train 0.6622 | Val 0.5672 | LR 0.000000


train 953: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 953: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.568]


Epoch 953 | Train 0.6631 | Val 0.5682 | LR 0.000000


train 954: 100%|██████████| 58/58 [01:06<00:00,  1.14s/it, av_loss=0.661]
val 954: 100%|██████████| 4/4 [00:07<00:00,  1.94s/it, av_loss=0.567]


Epoch 954 | Train 0.6615 | Val 0.5668 | LR 0.000000


train 955: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 955: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.567]


Epoch 955 | Train 0.6622 | Val 0.5665 | LR 0.000000


train 956: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.661]
val 956: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.567]


Epoch 956 | Train 0.6613 | Val 0.5674 | LR 0.000000


train 957: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 957: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 957 | Train 0.6617 | Val 0.5668 | LR 0.000000


train 958: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 958: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.567]


Epoch 958 | Train 0.6620 | Val 0.5674 | LR 0.000000


train 959: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 959: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.567]


Epoch 959 | Train 0.6615 | Val 0.5670 | LR 0.000000


train 960: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 960: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.566]


Epoch 960 | Train 0.6625 | Val 0.5656 | LR 0.000000


train 961: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 961: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 961 | Train 0.6620 | Val 0.5668 | LR 0.000000


train 962: 100%|██████████| 58/58 [01:06<00:00,  1.16s/it, av_loss=0.662]
val 962: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.566]


Epoch 962 | Train 0.6620 | Val 0.5662 | LR 0.000000


train 963: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 963: 100%|██████████| 4/4 [00:07<00:00,  1.81s/it, av_loss=0.567]


Epoch 963 | Train 0.6618 | Val 0.5675 | LR 0.000000


train 964: 100%|██████████| 58/58 [01:05<00:00,  1.13s/it, av_loss=0.664]
val 964: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 964 | Train 0.6637 | Val 0.5674 | LR 0.000000


train 965: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 965: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 965 | Train 0.6624 | Val 0.5668 | LR 0.000000


train 966: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.662]
val 966: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 966 | Train 0.6617 | Val 0.5667 | LR 0.000000


train 967: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.663]
val 967: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 967 | Train 0.6626 | Val 0.5668 | LR 0.000000


train 968: 100%|██████████| 58/58 [01:05<00:00,  1.14s/it, av_loss=0.661]
val 968: 100%|██████████| 4/4 [00:07<00:00,  1.80s/it, av_loss=0.565]


Epoch 968 | Train 0.6612 | Val 0.5654 | LR 0.000000


train 969: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.661]
val 969: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.566]


Epoch 969 | Train 0.6613 | Val 0.5659 | LR 0.000000


train 970: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.661]
val 970: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.566]


Epoch 970 | Train 0.6611 | Val 0.5661 | LR 0.000000


train 971: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.661]
val 971: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 971 | Train 0.6614 | Val 0.5667 | LR 0.000000


train 972: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.661]
val 972: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.568]


Epoch 972 | Train 0.6610 | Val 0.5683 | LR 0.000000


train 973: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 973: 100%|██████████| 4/4 [00:07<00:00,  1.79s/it, av_loss=0.567]


Epoch 973 | Train 0.6616 | Val 0.5665 | LR 0.000000


train 974: 100%|██████████| 58/58 [01:05<00:00,  1.14s/it, av_loss=0.663]
val 974: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.568]


Epoch 974 | Train 0.6629 | Val 0.5677 | LR 0.000000


train 975: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 975: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it, av_loss=0.567]


Epoch 975 | Train 0.6616 | Val 0.5669 | LR 0.000000


train 976: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 976: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.567]


Epoch 976 | Train 0.6631 | Val 0.5670 | LR 0.000000


train 977: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.662]
val 977: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.567]


Epoch 977 | Train 0.6623 | Val 0.5672 | LR 0.000000


train 978: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 978: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.568]


Epoch 978 | Train 0.6630 | Val 0.5678 | LR 0.000000


train 979: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.662]
val 979: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it, av_loss=0.567]


Epoch 979 | Train 0.6624 | Val 0.5669 | LR 0.000000


train 980: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.661]
val 980: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.566]


Epoch 980 | Train 0.6612 | Val 0.5662 | LR 0.000000


train 981: 100%|██████████| 58/58 [01:09<00:00,  1.19s/it, av_loss=0.662]
val 981: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.566]


Epoch 981 | Train 0.6624 | Val 0.5665 | LR 0.000000


train 982: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.662]
val 982: 100%|██████████| 4/4 [00:07<00:00,  1.92s/it, av_loss=0.567]


Epoch 982 | Train 0.6618 | Val 0.5665 | LR 0.000000


train 983: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 983: 100%|██████████| 4/4 [00:07<00:00,  1.87s/it, av_loss=0.566]


Epoch 983 | Train 0.6620 | Val 0.5655 | LR 0.000000


train 984: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 984: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.567]


Epoch 984 | Train 0.6619 | Val 0.5672 | LR 0.000000


train 985: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.662]
val 985: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.567]


Epoch 985 | Train 0.6624 | Val 0.5669 | LR 0.000000


train 986: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.663]
val 986: 100%|██████████| 4/4 [00:07<00:00,  1.95s/it, av_loss=0.566]


Epoch 986 | Train 0.6628 | Val 0.5663 | LR 0.000000


train 987: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.662]
val 987: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it, av_loss=0.566]


Epoch 987 | Train 0.6624 | Val 0.5662 | LR 0.000000


train 988: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 988: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 988 | Train 0.6622 | Val 0.5668 | LR 0.000000


train 989: 100%|██████████| 58/58 [01:08<00:00,  1.18s/it, av_loss=0.662]
val 989: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 989 | Train 0.6622 | Val 0.5667 | LR 0.000000


train 990: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 990: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.567]


Epoch 990 | Train 0.6618 | Val 0.5667 | LR 0.000000


train 991: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 991: 100%|██████████| 4/4 [00:07<00:00,  1.88s/it, av_loss=0.567]


Epoch 991 | Train 0.6617 | Val 0.5668 | LR 0.000000


train 992: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 992: 100%|██████████| 4/4 [00:07<00:00,  1.90s/it, av_loss=0.568]


Epoch 992 | Train 0.6625 | Val 0.5683 | LR 0.000000


train 993: 100%|██████████| 58/58 [01:08<00:00,  1.17s/it, av_loss=0.662]
val 993: 100%|██████████| 4/4 [00:07<00:00,  1.82s/it, av_loss=0.567]


Epoch 993 | Train 0.6624 | Val 0.5671 | LR 0.000000


train 994: 100%|██████████| 58/58 [01:06<00:00,  1.15s/it, av_loss=0.663]
val 994: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.567]


Epoch 994 | Train 0.6631 | Val 0.5669 | LR 0.000000


train 995: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.662]
val 995: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.567]


Epoch 995 | Train 0.6624 | Val 0.5670 | LR 0.000000


train 996: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.661]
val 996: 100%|██████████| 4/4 [00:07<00:00,  1.91s/it, av_loss=0.567]


Epoch 996 | Train 0.6612 | Val 0.5667 | LR 0.000000


train 997: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.662]
val 997: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.567]


Epoch 997 | Train 0.6622 | Val 0.5670 | LR 0.000000


train 998: 100%|██████████| 58/58 [01:07<00:00,  1.16s/it, av_loss=0.663]
val 998: 100%|██████████| 4/4 [00:07<00:00,  1.86s/it, av_loss=0.567]


Epoch 998 | Train 0.6629 | Val 0.5666 | LR 0.000000


train 999: 100%|██████████| 58/58 [01:07<00:00,  1.17s/it, av_loss=0.663]
val 999: 100%|██████████| 4/4 [00:07<00:00,  1.89s/it, av_loss=0.565]

Epoch 999 | Train 0.6628 | Val 0.5653 | LR 0.000000





In [15]:
outputs, targets

(tensor([[ 2.2256,  0.2028,  5.3070,  ...,  0.0000,  0.0162,  0.4127],
         [ 1.8853, -0.0358,  0.3338,  ...,  1.9942,  0.0000,  1.9039],
         [ 0.0000,  0.0000,  0.4560,  ...,  1.0192,  0.6509,  1.7127],
         ...,
         [ 3.7650,  0.0000, -0.1808,  ...,  5.6089,  3.4821,  0.4775],
         [-0.6968, -0.1935,  1.1592,  ...,  0.0000,  0.2794,  2.3095],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  1.0781,  6.7716]],
        device='cuda:0', grad_fn=<IndexPutBackward0>),
 tensor([2, 5, 3,  ..., 5, 7, 4], device='cuda:0'))