In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from scipy.optimize import linear_sum_assignment
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from collections import Counter

from helper_funcs import generate_sequences


In [2]:
import wandb
run = wandb.init()
artifact = run.use_artifact('matteopeluso1922/cdcd-hmp-param-search-orion_truewarp/best_model_aiicxkad:v0', type='model')
artifact_dir = artifact.download()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmatteopeluso1922[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [3]:
# Load data
loaded_df = pd.read_hdf('./data/sample_otu_arrays.h5', key='df')

# Set random seed
np.random.seed(42)

# Split indices into train/test
train_idx, test_idx = train_test_split(loaded_df.index, test_size=0.2, random_state=42)

# Create train and test dataframes
train_df = loaded_df.loc[train_idx]
test_df = loaded_df.loc[test_idx]

print(f"Train size: {len(train_df)}")
print(f"Test size: {len(test_df)}")
print("\nFirst few training samples:")
print(train_df.head())

# Let's also look at array lengths
array_lengths = [len(x) for x in loaded_df['otu_arrays']]
print(f"\nMin array length: {min(array_lengths)}")
print(f"Max array length: {max(array_lengths)}")
print(f"Mean array length: {np.mean(array_lengths):.2f}")

Train size: 6486
Test size: 1622

First few training samples:
                                                            otu_arrays
Unnamed: 0                                                            
SRR044975.SRS011167  [30, 58, 82, 89, 93, 98, 99, 104, 117, 120, 12...
SRR049604.SRS049164  [9, 10, 11, 14, 15, 16, 17, 20, 28, 30, 31, 32...
SRR331714.SRS076947  [19, 30, 43, 58, 65, 70, 71, 74, 80, 90, 92, 9...
SRR089999.SRS077685  [12, 14, 18, 20, 22, 38, 45, 67, 68, 76, 88, 1...
SRR048091.SRS021563  [19, 30, 45, 52, 58, 60, 65, 70, 74, 80, 90, 9...

Min array length: 3
Max array length: 277
Mean array length: 69.10


In [4]:
import torch
from torch.utils.data import Dataset, DataLoader

class OTUDataset(Dataset):
   def __init__(self, df):
       self.df = df
       
       # Find max sequence length for padding
       self.max_len = max(len(x) for x in df['otu_arrays'])
       
   def __len__(self):
       return len(self.df)
   
   def __getitem__(self, idx):
       # Get array for this sample
       array = self.df.iloc[idx]['otu_arrays']
       
       # Create padded tensor
       padded = torch.zeros(self.max_len, dtype=torch.long)
       padded[:len(array)] = torch.tensor(array)
       
       # Create mask (False where we have real tokens, True for padding)
       mask = torch.zeros(self.max_len, dtype=torch.bool)
       mask[len(array):] = True
       
       return padded, mask

# Create datasets
train_dataset = OTUDataset(train_df)
test_dataset = OTUDataset(test_df)

# Create dataloaders
batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Verify shapes
for tokens, mask in train_loader:
   print(f"Batch tokens shape: {tokens.shape}")
   print(f"Batch mask shape: {mask.shape}")

   break

# Get vocab size (maximum token ID + 1 for padding)
vocab_size = max(max(x) for x in loaded_df['otu_arrays']) + 1
print(f"\nVocabulary size: {vocab_size}")

Batch tokens shape: torch.Size([8, 277])
Batch mask shape: torch.Size([8, 277])

Vocabulary size: 519


In [5]:
import model_arch
import helper_funcs
import importlib
from model_arch import CategoricalScoreDiffusion
from helper_funcs import generate_sequences
importlib.reload(model_arch)
importlib.reload(helper_funcs)

<module 'helper_funcs' from '/mnt/mnemo9/mpelus/matlas/cdcd_multi_train/cdcd_hmp/helper_funcs.py'>

In [6]:

class TrainingMetrics:
    def __init__(self):
        self.best_val_loss = float('inf')

        
    def update_best_metrics(self, val_loss):
        improved = False
        if val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
            improved = True
        return improved

def train_step(model, tokens, mask, optimizer, device):
    optimizer.zero_grad()
    
    # Sample time using warping
    t = model.sample_time(tokens.shape[0], tokens.device)

    # Get clean embeddings
    x0 = model.embedding(tokens)
  
    
    # Add noise
    noise = model.get_noise(x0, t)

    xt = x0 + noise

    
    # Get model predictions
    logits = model(xt, mask, t)

    
    # Compute loss
    loss = F.cross_entropy(
        logits.view(-1, logits.size(-1)),
        tokens.view(-1),
        ignore_index=0
    )

    if not torch.isnan(loss):
        model.update_time_warping(t, loss.detach())
        loss.backward()
        optimizer.step()
    
    return loss.item()

def validation_step(model, tokens, mask, device):
    # Sample time using warping
    t = model.sample_time(tokens.shape[0], tokens.device)
    
    # Get clean embeddings
    x0 = model.embedding(tokens)
    
    # Add noise according to N(0, σt²)
    noise = model.get_noise(x0, t)
    xt = x0 + noise
    
    # Get model predictions
    logits = model(xt, mask, t)
    
    # Compute cross-entropy loss with padding handling
    loss = F.cross_entropy(
        logits.view(-1, logits.size(-1)),
        tokens.view(-1),
        ignore_index=0  # Assuming 0 is padding token
    )
    
    return loss.item()

def save_checkpoint(model, optimizer, scheduler, epoch, train_loss, val_loss):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
        'train_loss': train_loss,
        'val_loss': val_loss,
        
    }
    torch.save(checkpoint, 'best_model.pt')

def log_metrics(metrics_dict, step_type='batch'):
    wandb.log(metrics_dict)

def train_epoch(model, train_loader, optimizer, device, epoch):
    model.train()
    train_loss = 0
    train_bar = tqdm(train_loader, desc=f'Training Epoch {epoch}')
    
    for batch_idx, (tokens, mask) in enumerate(train_bar):
        tokens = tokens.to(device)
        mask = mask.to(device)
        
        loss = train_step(model, tokens, mask, optimizer, device)
        train_loss += loss
        
        train_bar.set_postfix({'loss': f'{loss:.4f}'})
        log_metrics({
            'train/batch_loss': loss,
            'train/learning_rate': optimizer.param_groups[0]['lr'],
            'epoch': epoch,
            'batch': batch_idx
        })
    
    return train_loss / len(train_loader)

def validate_epoch(model, test_loader, device, epoch):
    model.eval()
    val_loss = 0
    val_bar = tqdm(test_loader, desc=f'Validation Epoch {epoch}')
    
    # Collect real sequences
    real_sequences = []
    with torch.no_grad():
        for tokens, mask in val_bar:
            tokens = tokens.to(device)
            mask = mask.to(device)
            
            loss = validation_step(model, tokens, mask, device)
            val_loss += loss
            val_bar.set_postfix({'loss': f'{loss:.4f}'})
            
            real_sequences.extend([seq[seq != 0].cpu().numpy() for seq in tokens])

    
    return val_loss / len(test_loader)



def train_and_validate(model, train_loader, test_loader, optimizer, num_epochs, device, use_lr_scheduling=True):
    metrics = TrainingMetrics()
    
    scheduler = None
    if use_lr_scheduling:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', patience=3, factor=0.5, verbose=True
        )
    
    for epoch in range(num_epochs):
        # Training phase
        avg_train_loss = train_epoch(model, train_loader, optimizer, device, epoch)
        log_metrics({'train/epoch_loss': avg_train_loss, 'epoch': epoch})
         
        # Validation phase (every 5 epochs)
        if epoch % 1 == 0:
            avg_val_loss = validate_epoch(model, test_loader, device, epoch)
            
            log_metrics({
                'val/epoch_loss': avg_val_loss,
                'epoch': epoch
            })
            
            print(f'\nEpoch {epoch}:')
            print(f'Average Train Loss: {avg_train_loss:.4f}')
            print(f'Average Val Loss: {avg_val_loss:.4f}')
         
            
            if scheduler:
                scheduler.step(avg_val_loss)
            
            if metrics.update_best_metrics(avg_val_loss):
                save_checkpoint(model, optimizer, scheduler, epoch, avg_train_loss, avg_val_loss)
                log_metrics({
                    'best_model/val_loss': avg_val_loss,
                    'best_model/train_loss': avg_train_loss,
                    'best_model/epoch': epoch
                })
        else:
            print(f'\nEpoch {epoch}: Average Train Loss: {avg_train_loss:.4f}\n')


def train_step(model, tokens, mask, optimizer, device):
    optimizer.zero_grad()
    
    t = model.sample_time(tokens.shape[0], tokens.device)
    
    # Get bin assignments and importance weights
    bin_idx = model.time_warping.get_bin_assignment(t)
    importance_weights = model.time_warping.get_importance_weights(bin_idx)
    
    x0 = model.embedding(tokens)
    noise = model.get_noise(x0, t)
    xt = x0 + noise
    logits = model(xt, mask, t)
    
    loss = F.cross_entropy(
        logits.view(-1, logits.size(-1)),
        tokens.view(-1),
        ignore_index=0
    )

    if not torch.isnan(loss):
        # Collect statistics for time warping
        model.time_warping.collect_statistics(t, loss.detach().expand(tokens.shape[0]))
        
        # Apply importance weights to loss
        weighted_loss = loss * importance_weights.mean()
        weighted_loss.backward()
        optimizer.step()
    
    return loss.item()

def train_epoch(model, train_loader, optimizer, device, epoch):
    model.train()
    train_loss = 0
    train_bar = tqdm(train_loader, desc=f'Training Epoch {epoch}')
    
    for batch_idx, (tokens, mask) in enumerate(train_bar):
        tokens = tokens.to(device)
        mask = mask.to(device)
        
        loss = train_step(model, tokens, mask, optimizer, device)
        train_loss += loss
        train_bar.set_postfix({'loss': f'{loss:.4f}'})
        
        log_metrics({
            'train/batch_loss': loss,
            'train/learning_rate': optimizer.param_groups[0]['lr'],
            'epoch': epoch,
            'batch': batch_idx
        })
    
    # Update time warping at end of epoch using accumulated statistics
    model.time_warping.update_warping()
    
    return train_loss / len(train_loader)


In [7]:
checkpoint = torch.load(f"/mnt/mnemo9/mpelus/matlas/cdcd_multi_train/cdcd_hmp/artifacts/best_model_aiicxkad:v0/tmp21qdlph1.pt")
# Initialize model with the same parameters you showed
embed_dim = 32
num_layers = 3
num_heads = 8
dim_feedforward = 32
num_fourier_features = 16

model = CategoricalScoreDiffusion(
    vocab_size=vocab_size,
    embed_dim=embed_dim,
    num_layers=num_layers,
    num_heads=num_heads,
    dim_feedforward=dim_feedforward,
    num_fourier_features=num_fourier_features
)


model.load_state_dict(checkpoint)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Move model to device
model = model.to(device)

  checkpoint = torch.load(f"/mnt/mnemo9/mpelus/matlas/cdcd_multi_train/cdcd_hmp/artifacts/best_model_aiicxkad:v0/tmp21qdlph1.pt")


RuntimeError: Error(s) in loading state_dict for CategoricalScoreDiffusion:
	Unexpected key(s) in state_dict: "transformer.layers.1.self_attn.in_proj_weight", "transformer.layers.1.self_attn.in_proj_bias", "transformer.layers.1.self_attn.out_proj.weight", "transformer.layers.1.self_attn.out_proj.bias", "transformer.layers.1.linear1.weight", "transformer.layers.1.linear1.bias", "transformer.layers.1.linear2.weight", "transformer.layers.1.linear2.bias", "transformer.layers.1.norm1.weight", "transformer.layers.1.norm1.bias", "transformer.layers.1.norm2.weight", "transformer.layers.1.norm2.bias", "transformer.layers.2.self_attn.in_proj_weight", "transformer.layers.2.self_attn.in_proj_bias", "transformer.layers.2.self_attn.out_proj.weight", "transformer.layers.2.self_attn.out_proj.bias", "transformer.layers.2.linear1.weight", "transformer.layers.2.linear1.bias", "transformer.layers.2.linear2.weight", "transformer.layers.2.linear2.bias", "transformer.layers.2.norm1.weight", "transformer.layers.2.norm1.bias", "transformer.layers.2.norm2.weight", "transformer.layers.2.norm2.bias", "transformer.layers.3.self_attn.in_proj_weight", "transformer.layers.3.self_attn.in_proj_bias", "transformer.layers.3.self_attn.out_proj.weight", "transformer.layers.3.self_attn.out_proj.bias", "transformer.layers.3.linear1.weight", "transformer.layers.3.linear1.bias", "transformer.layers.3.linear2.weight", "transformer.layers.3.linear2.bias", "transformer.layers.3.norm1.weight", "transformer.layers.3.norm1.bias", "transformer.layers.3.norm2.weight", "transformer.layers.3.norm2.bias". 
	size mismatch for random_matrix: copying a param with shape torch.Size([1, 4]) from checkpoint, the shape in current model is torch.Size([1, 16]).
	size mismatch for embedding.embedding.weight: copying a param with shape torch.Size([519, 96]) from checkpoint, the shape in current model is torch.Size([519, 160]).
	size mismatch for transformer.layers.0.self_attn.in_proj_weight: copying a param with shape torch.Size([288, 96]) from checkpoint, the shape in current model is torch.Size([480, 160]).
	size mismatch for transformer.layers.0.self_attn.in_proj_bias: copying a param with shape torch.Size([288]) from checkpoint, the shape in current model is torch.Size([480]).
	size mismatch for transformer.layers.0.self_attn.out_proj.weight: copying a param with shape torch.Size([96, 96]) from checkpoint, the shape in current model is torch.Size([160, 160]).
	size mismatch for transformer.layers.0.self_attn.out_proj.bias: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for transformer.layers.0.linear1.weight: copying a param with shape torch.Size([28, 96]) from checkpoint, the shape in current model is torch.Size([32, 160]).
	size mismatch for transformer.layers.0.linear1.bias: copying a param with shape torch.Size([28]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for transformer.layers.0.linear2.weight: copying a param with shape torch.Size([96, 28]) from checkpoint, the shape in current model is torch.Size([160, 32]).
	size mismatch for transformer.layers.0.linear2.bias: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for transformer.layers.0.norm1.weight: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for transformer.layers.0.norm1.bias: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for transformer.layers.0.norm2.weight: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for transformer.layers.0.norm2.bias: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for to_logits.weight: copying a param with shape torch.Size([519, 96]) from checkpoint, the shape in current model is torch.Size([519, 160]).
	size mismatch for time_mlp.0.weight: copying a param with shape torch.Size([96, 8]) from checkpoint, the shape in current model is torch.Size([160, 32]).
	size mismatch for time_mlp.0.bias: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([160]).
	size mismatch for time_mlp.2.weight: copying a param with shape torch.Size([96, 96]) from checkpoint, the shape in current model is torch.Size([160, 160]).
	size mismatch for time_mlp.2.bias: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([160]).

In [16]:
# Initialize model
embed_dim = 32
num_layers = 3
num_heads = 8
dim_feedforward = 32
num_fourier_features = 8# going from 4 to 8 destabilised the batch loss but seems o have resulted in a faster convergence and lower
model = CategoricalScoreDiffusion(
    vocab_size=vocab_size,
    embed_dim=embed_dim,
    num_layers=num_layers,
    num_heads=num_heads,
    dim_feedforward=dim_feedforward,
    num_fourier_features=num_fourier_features
    
)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Move model to device
model = model.to(device)


In [17]:
import wandb
num_epochs = 200
learning_rate = 1e-3

wandb.finish()
wandb.init(
    project="diffusion-hmp",
    config={
        "learning_rate": learning_rate,
        "architecture": "restart",
        "dataset": "hmp",
        "epochs": num_epochs,
        "embed_dim": embed_dim,
        "num_layers": num_layers,
        "num_heads": num_heads,
        "dim_feedforward": dim_feedforward,
        "vocab_size": vocab_size,
        "num_fourier_features":num_fourier_features
    }
)

0,1
batch,█▅▅▆▂▇▇▄▇▃█▅▆▂▃▄▅▃▅▇▂▅▇▅▄▃▅▆▁▅▆▂▇▅▄▄▃▆▁▄
best_model/epoch,▁▁▁▂▂▃██
best_model/train_loss,▇█▆▄▂▂▁▂
best_model/val_loss,█▅▄▄▃▂▂▁
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇█
train/batch_loss,▄▄▅▆▂▇▅▄▅▆▇▃▆▆▆▇▇▆▃▄▅█▆▇██▆▃▃▃▁▇▂▆▇▃▇▅▆▆
train/epoch_loss,█▆▆▅▅▅▄▂▄▅▃▄▃▂▄▃▄▄▃▃▂▃▂▃▃▂▂▄▃▂▃▂▃▃▁▄▃▁▂▂
train/learning_rate,████▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/epoch_loss,▇▅▆▅▇▇▆▅▆▆▄▆▅▆▃▇▅▃▆▇▂▃▃▄▄▄█▅▂█▄▄▄▄▃▂▅▇▁▄

0,1
batch,373.0
best_model/epoch,73.0
best_model/train_loss,3.86045
best_model/val_loss,3.75278
epoch,78.0
train/batch_loss,3.749
train/epoch_loss,3.85698
train/learning_rate,0.0
val/epoch_loss,3.82535


In [18]:
# Training parameters
# Initialize optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
# Start training
train_and_validate(model, train_loader, test_loader, optimizer, num_epochs, device)

Training Epoch 0: 100%|██████████| 811/811 [00:09<00:00, 85.16it/s, loss=4.0057]
Validation Epoch 0: 100%|██████████| 203/203 [00:01<00:00, 185.44it/s, loss=4.1993]



Epoch 0:
Average Train Loss: 4.6154
Average Val Loss: 4.3147


Training Epoch 1: 100%|██████████| 811/811 [00:09<00:00, 88.69it/s, loss=4.1831]
Validation Epoch 1: 100%|██████████| 203/203 [00:01<00:00, 172.69it/s, loss=4.5834]



Epoch 1:
Average Train Loss: 4.2875
Average Val Loss: 4.2305


Training Epoch 2: 100%|██████████| 811/811 [00:09<00:00, 86.42it/s, loss=4.9299]
Validation Epoch 2: 100%|██████████| 203/203 [00:01<00:00, 173.44it/s, loss=3.2719]



Epoch 2:
Average Train Loss: 4.1973
Average Val Loss: 4.1679


Training Epoch 3: 100%|██████████| 811/811 [00:09<00:00, 85.77it/s, loss=3.8684]
Validation Epoch 3: 100%|██████████| 203/203 [00:01<00:00, 173.96it/s, loss=4.0730]



Epoch 3:
Average Train Loss: 4.1221
Average Val Loss: 4.0807


Training Epoch 4: 100%|██████████| 811/811 [00:09<00:00, 88.25it/s, loss=3.8990]
Validation Epoch 4: 100%|██████████| 203/203 [00:01<00:00, 183.06it/s, loss=4.0240]



Epoch 4:
Average Train Loss: 4.1027
Average Val Loss: 4.1247


Training Epoch 5: 100%|██████████| 811/811 [00:09<00:00, 89.21it/s, loss=4.2748]
Validation Epoch 5: 100%|██████████| 203/203 [00:01<00:00, 181.84it/s, loss=3.2997]



Epoch 5:
Average Train Loss: 4.0375
Average Val Loss: 4.0326


Training Epoch 6: 100%|██████████| 811/811 [00:09<00:00, 88.46it/s, loss=4.2696]
Validation Epoch 6: 100%|██████████| 203/203 [00:01<00:00, 192.03it/s, loss=3.6971]



Epoch 6:
Average Train Loss: 4.0129
Average Val Loss: 4.0138


Training Epoch 7: 100%|██████████| 811/811 [00:09<00:00, 86.08it/s, loss=3.7715]
Validation Epoch 7: 100%|██████████| 203/203 [00:01<00:00, 179.89it/s, loss=4.2486]



Epoch 7:
Average Train Loss: 3.9902
Average Val Loss: 4.0222


Training Epoch 8: 100%|██████████| 811/811 [00:08<00:00, 90.21it/s, loss=4.8278]
Validation Epoch 8: 100%|██████████| 203/203 [00:01<00:00, 191.82it/s, loss=4.0898]



Epoch 8:
Average Train Loss: 3.9983
Average Val Loss: 3.9843


Training Epoch 9: 100%|██████████| 811/811 [00:08<00:00, 90.61it/s, loss=3.7562]
Validation Epoch 9: 100%|██████████| 203/203 [00:01<00:00, 190.02it/s, loss=3.4666]



Epoch 9:
Average Train Loss: 3.9778
Average Val Loss: 3.9579


Training Epoch 10: 100%|██████████| 811/811 [00:09<00:00, 88.92it/s, loss=3.9166]
Validation Epoch 10: 100%|██████████| 203/203 [00:01<00:00, 177.69it/s, loss=3.1965]



Epoch 10:
Average Train Loss: 3.9735
Average Val Loss: 3.9895


Training Epoch 11: 100%|██████████| 811/811 [00:09<00:00, 85.90it/s, loss=4.3929]
Validation Epoch 11: 100%|██████████| 203/203 [00:01<00:00, 181.00it/s, loss=4.1668]



Epoch 11:
Average Train Loss: 3.9745
Average Val Loss: 4.0064


Training Epoch 12: 100%|██████████| 811/811 [00:09<00:00, 87.94it/s, loss=3.3683]
Validation Epoch 12: 100%|██████████| 203/203 [00:01<00:00, 123.82it/s, loss=4.1838]



Epoch 12:
Average Train Loss: 3.9511
Average Val Loss: 4.0305


Training Epoch 13: 100%|██████████| 811/811 [00:09<00:00, 85.97it/s, loss=4.4225]
Validation Epoch 13: 100%|██████████| 203/203 [00:01<00:00, 122.29it/s, loss=4.6882]



Epoch 13:
Average Train Loss: 3.9464
Average Val Loss: 3.9523


Training Epoch 14: 100%|██████████| 811/811 [00:09<00:00, 81.88it/s, loss=3.9012]
Validation Epoch 14: 100%|██████████| 203/203 [00:01<00:00, 119.31it/s, loss=3.0698]



Epoch 14:
Average Train Loss: 3.9367
Average Val Loss: 3.9260


Training Epoch 15: 100%|██████████| 811/811 [00:09<00:00, 84.40it/s, loss=3.5695]
Validation Epoch 15: 100%|██████████| 203/203 [00:01<00:00, 179.00it/s, loss=4.1211]



Epoch 15:
Average Train Loss: 3.9926
Average Val Loss: 3.9048


Training Epoch 16: 100%|██████████| 811/811 [00:09<00:00, 84.87it/s, loss=3.0857]
Validation Epoch 16: 100%|██████████| 203/203 [00:01<00:00, 186.46it/s, loss=3.9099]



Epoch 16:
Average Train Loss: 3.9630
Average Val Loss: 3.9437


Training Epoch 17: 100%|██████████| 811/811 [00:09<00:00, 83.09it/s, loss=4.4891]
Validation Epoch 17: 100%|██████████| 203/203 [00:01<00:00, 189.63it/s, loss=3.7747]



Epoch 17:
Average Train Loss: 3.9331
Average Val Loss: 3.9731


Training Epoch 18: 100%|██████████| 811/811 [00:09<00:00, 82.06it/s, loss=3.7189]
Validation Epoch 18: 100%|██████████| 203/203 [00:01<00:00, 176.33it/s, loss=4.2200]



Epoch 18:
Average Train Loss: 3.9407
Average Val Loss: 3.9628


Training Epoch 19: 100%|██████████| 811/811 [00:09<00:00, 87.16it/s, loss=4.2909]
Validation Epoch 19: 100%|██████████| 203/203 [00:01<00:00, 181.88it/s, loss=4.5857]



Epoch 19:
Average Train Loss: 3.9668
Average Val Loss: 3.9602


Training Epoch 20: 100%|██████████| 811/811 [00:09<00:00, 86.81it/s, loss=3.1465]
Validation Epoch 20: 100%|██████████| 203/203 [00:01<00:00, 191.79it/s, loss=3.8706]



Epoch 20:
Average Train Loss: 3.9559
Average Val Loss: 3.9827


Training Epoch 21: 100%|██████████| 811/811 [00:09<00:00, 89.50it/s, loss=4.7856]
Validation Epoch 21: 100%|██████████| 203/203 [00:01<00:00, 180.31it/s, loss=3.1435]



Epoch 21:
Average Train Loss: 3.9446
Average Val Loss: 3.9350


Training Epoch 22: 100%|██████████| 811/811 [00:09<00:00, 86.78it/s, loss=3.0904]
Validation Epoch 22: 100%|██████████| 203/203 [00:01<00:00, 183.71it/s, loss=3.7620]



Epoch 22:
Average Train Loss: 3.9438
Average Val Loss: 3.9112


Training Epoch 23: 100%|██████████| 811/811 [00:09<00:00, 89.80it/s, loss=4.5324]
Validation Epoch 23: 100%|██████████| 203/203 [00:01<00:00, 160.55it/s, loss=3.6430]



Epoch 23:
Average Train Loss: 3.9181
Average Val Loss: 3.9085


Training Epoch 24: 100%|██████████| 811/811 [00:09<00:00, 85.92it/s, loss=4.2095]
Validation Epoch 24: 100%|██████████| 203/203 [00:01<00:00, 185.23it/s, loss=3.0022]



Epoch 24:
Average Train Loss: 3.9233
Average Val Loss: 3.9114


Training Epoch 25: 100%|██████████| 811/811 [00:09<00:00, 85.08it/s, loss=3.8977]
Validation Epoch 25: 100%|██████████| 203/203 [00:01<00:00, 186.99it/s, loss=3.8404]



Epoch 25:
Average Train Loss: 3.9043
Average Val Loss: 3.9404


Training Epoch 26: 100%|██████████| 811/811 [00:09<00:00, 84.70it/s, loss=4.4660]
Validation Epoch 26: 100%|██████████| 203/203 [00:01<00:00, 167.74it/s, loss=4.1456]



Epoch 26:
Average Train Loss: 3.9159
Average Val Loss: 3.8403


Training Epoch 27: 100%|██████████| 811/811 [00:09<00:00, 86.34it/s, loss=3.6490]
Validation Epoch 27: 100%|██████████| 203/203 [00:01<00:00, 188.03it/s, loss=3.7650]



Epoch 27:
Average Train Loss: 3.8775
Average Val Loss: 3.9122


Training Epoch 28: 100%|██████████| 811/811 [00:09<00:00, 89.10it/s, loss=4.1568]
Validation Epoch 28: 100%|██████████| 203/203 [00:01<00:00, 172.26it/s, loss=4.7893]



Epoch 28:
Average Train Loss: 3.9031
Average Val Loss: 3.9474


Training Epoch 29: 100%|██████████| 811/811 [00:08<00:00, 91.20it/s, loss=4.3331]
Validation Epoch 29: 100%|██████████| 203/203 [00:01<00:00, 171.44it/s, loss=3.0869]



Epoch 29:
Average Train Loss: 3.9337
Average Val Loss: 3.8983


Training Epoch 30: 100%|██████████| 811/811 [00:09<00:00, 83.74it/s, loss=5.2862]
Validation Epoch 30: 100%|██████████| 203/203 [00:01<00:00, 179.86it/s, loss=3.4465]



Epoch 30:
Average Train Loss: 3.9228
Average Val Loss: 3.8222


Training Epoch 31: 100%|██████████| 811/811 [00:09<00:00, 83.64it/s, loss=2.5880]
Validation Epoch 31: 100%|██████████| 203/203 [00:01<00:00, 170.94it/s, loss=4.1522]



Epoch 31:
Average Train Loss: 3.9149
Average Val Loss: 3.9060


Training Epoch 32: 100%|██████████| 811/811 [00:09<00:00, 88.79it/s, loss=2.2682]
Validation Epoch 32: 100%|██████████| 203/203 [00:01<00:00, 181.57it/s, loss=3.1843]



Epoch 32:
Average Train Loss: 3.9357
Average Val Loss: 3.9633


Training Epoch 33: 100%|██████████| 811/811 [00:09<00:00, 88.33it/s, loss=4.9035]
Validation Epoch 33: 100%|██████████| 203/203 [00:01<00:00, 169.68it/s, loss=3.3272]



Epoch 33:
Average Train Loss: 3.9107
Average Val Loss: 3.9466


Training Epoch 34: 100%|██████████| 811/811 [00:09<00:00, 85.38it/s, loss=3.7570]
Validation Epoch 34: 100%|██████████| 203/203 [00:01<00:00, 133.09it/s, loss=3.1742]



Epoch 34:
Average Train Loss: 3.8892
Average Val Loss: 3.8703


Training Epoch 35: 100%|██████████| 811/811 [00:09<00:00, 89.77it/s, loss=4.9201]
Validation Epoch 35: 100%|██████████| 203/203 [00:01<00:00, 182.61it/s, loss=4.1713]



Epoch 35:
Average Train Loss: 3.9421
Average Val Loss: 3.9370


Training Epoch 36: 100%|██████████| 811/811 [00:08<00:00, 90.65it/s, loss=4.2823]
Validation Epoch 36: 100%|██████████| 203/203 [00:01<00:00, 177.96it/s, loss=4.4103]



Epoch 36:
Average Train Loss: 3.9075
Average Val Loss: 3.8556


Training Epoch 37: 100%|██████████| 811/811 [00:09<00:00, 88.02it/s, loss=2.3677]
Validation Epoch 37: 100%|██████████| 203/203 [00:01<00:00, 191.56it/s, loss=4.4010]



Epoch 37:
Average Train Loss: 3.8935
Average Val Loss: 3.8991


Training Epoch 38: 100%|██████████| 811/811 [00:09<00:00, 82.99it/s, loss=3.9348]
Validation Epoch 38: 100%|██████████| 203/203 [00:01<00:00, 173.74it/s, loss=3.1089]



Epoch 38:
Average Train Loss: 3.9419
Average Val Loss: 3.8774


Training Epoch 39: 100%|██████████| 811/811 [00:09<00:00, 88.63it/s, loss=3.0058]
Validation Epoch 39: 100%|██████████| 203/203 [00:01<00:00, 183.10it/s, loss=3.3268]



Epoch 39:
Average Train Loss: 3.8836
Average Val Loss: 3.9366


Training Epoch 40: 100%|██████████| 811/811 [00:09<00:00, 86.83it/s, loss=4.3017]
Validation Epoch 40: 100%|██████████| 203/203 [00:01<00:00, 191.00it/s, loss=4.7268]



Epoch 40:
Average Train Loss: 3.9051
Average Val Loss: 3.9255


Training Epoch 41: 100%|██████████| 811/811 [00:09<00:00, 87.79it/s, loss=4.7763]
Validation Epoch 41: 100%|██████████| 203/203 [00:01<00:00, 192.15it/s, loss=4.6554]



Epoch 41:
Average Train Loss: 3.8883
Average Val Loss: 3.9384


Training Epoch 42: 100%|██████████| 811/811 [00:09<00:00, 89.16it/s, loss=4.2320]
Validation Epoch 42: 100%|██████████| 203/203 [00:01<00:00, 185.66it/s, loss=4.5630]



Epoch 42:
Average Train Loss: 3.9205
Average Val Loss: 3.9259


Training Epoch 43: 100%|██████████| 811/811 [00:09<00:00, 87.55it/s, loss=3.6428]
Validation Epoch 43: 100%|██████████| 203/203 [00:01<00:00, 180.60it/s, loss=3.8957]



Epoch 43:
Average Train Loss: 3.9033
Average Val Loss: 3.8899


Training Epoch 44: 100%|██████████| 811/811 [00:09<00:00, 86.42it/s, loss=4.3599]
Validation Epoch 44: 100%|██████████| 203/203 [00:01<00:00, 171.94it/s, loss=4.7393]



Epoch 44:
Average Train Loss: 3.9246
Average Val Loss: 3.9260


Training Epoch 45: 100%|██████████| 811/811 [00:09<00:00, 87.06it/s, loss=2.6593]
Validation Epoch 45: 100%|██████████| 203/203 [00:01<00:00, 174.33it/s, loss=4.5525]



Epoch 45:
Average Train Loss: 3.8944
Average Val Loss: 3.9356


Training Epoch 46: 100%|██████████| 811/811 [00:08<00:00, 92.22it/s, loss=3.8107]
Validation Epoch 46: 100%|██████████| 203/203 [00:01<00:00, 179.42it/s, loss=4.5150]



Epoch 46:
Average Train Loss: 3.8988
Average Val Loss: 3.8337


Training Epoch 47: 100%|██████████| 811/811 [00:09<00:00, 84.96it/s, loss=4.2676]
Validation Epoch 47: 100%|██████████| 203/203 [00:01<00:00, 183.82it/s, loss=3.9347]



Epoch 47:
Average Train Loss: 3.9036
Average Val Loss: 3.9345


Training Epoch 48: 100%|██████████| 811/811 [00:09<00:00, 86.50it/s, loss=3.1866]
Validation Epoch 48: 100%|██████████| 203/203 [00:01<00:00, 182.17it/s, loss=4.3543]



Epoch 48:
Average Train Loss: 3.9005
Average Val Loss: 3.9186


Training Epoch 49: 100%|██████████| 811/811 [00:09<00:00, 84.04it/s, loss=3.5579]
Validation Epoch 49: 100%|██████████| 203/203 [00:01<00:00, 175.76it/s, loss=3.9203]



Epoch 49:
Average Train Loss: 3.9186
Average Val Loss: 3.8922


Training Epoch 50: 100%|██████████| 811/811 [00:08<00:00, 91.07it/s, loss=2.5710] 
Validation Epoch 50: 100%|██████████| 203/203 [00:01<00:00, 167.53it/s, loss=4.0429]



Epoch 50:
Average Train Loss: 3.8932
Average Val Loss: 3.9163


Training Epoch 51: 100%|██████████| 811/811 [00:09<00:00, 85.46it/s, loss=4.0356]
Validation Epoch 51: 100%|██████████| 203/203 [00:01<00:00, 182.16it/s, loss=2.5163]



Epoch 51:
Average Train Loss: 3.9010
Average Val Loss: 3.8964


Training Epoch 52: 100%|██████████| 811/811 [00:09<00:00, 89.19it/s, loss=3.9170]
Validation Epoch 52: 100%|██████████| 203/203 [00:01<00:00, 192.74it/s, loss=3.5739]



Epoch 52:
Average Train Loss: 3.8919
Average Val Loss: 3.9239


Training Epoch 53: 100%|██████████| 811/811 [00:09<00:00, 84.43it/s, loss=2.9033]
Validation Epoch 53: 100%|██████████| 203/203 [00:01<00:00, 182.80it/s, loss=4.0419]



Epoch 53:
Average Train Loss: 3.9194
Average Val Loss: 3.8161


Training Epoch 54: 100%|██████████| 811/811 [00:09<00:00, 88.10it/s, loss=4.0546]
Validation Epoch 54: 100%|██████████| 203/203 [00:01<00:00, 176.14it/s, loss=4.3462]



Epoch 54:
Average Train Loss: 3.8943
Average Val Loss: 3.9306


Training Epoch 55: 100%|██████████| 811/811 [00:09<00:00, 86.73it/s, loss=4.2266]
Validation Epoch 55: 100%|██████████| 203/203 [00:01<00:00, 178.92it/s, loss=4.2701]



Epoch 55:
Average Train Loss: 3.8916
Average Val Loss: 3.8540


Training Epoch 56: 100%|██████████| 811/811 [00:09<00:00, 84.46it/s, loss=3.2312]
Validation Epoch 56: 100%|██████████| 203/203 [00:01<00:00, 186.46it/s, loss=4.5985]



Epoch 56:
Average Train Loss: 3.9040
Average Val Loss: 3.9047


Training Epoch 57: 100%|██████████| 811/811 [00:09<00:00, 85.32it/s, loss=3.6109]
Validation Epoch 57: 100%|██████████| 203/203 [00:01<00:00, 179.47it/s, loss=3.8103]



Epoch 57:
Average Train Loss: 3.8833
Average Val Loss: 3.8601


Training Epoch 58: 100%|██████████| 811/811 [00:09<00:00, 88.69it/s, loss=3.5467]
Validation Epoch 58: 100%|██████████| 203/203 [00:01<00:00, 178.15it/s, loss=3.3173]



Epoch 58:
Average Train Loss: 3.8904
Average Val Loss: 3.9098


Training Epoch 59: 100%|██████████| 811/811 [00:09<00:00, 86.12it/s, loss=4.2245]
Validation Epoch 59: 100%|██████████| 203/203 [00:01<00:00, 191.63it/s, loss=4.5439]



Epoch 59:
Average Train Loss: 3.8841
Average Val Loss: 3.9047


Training Epoch 60: 100%|██████████| 811/811 [00:09<00:00, 84.12it/s, loss=4.6928]
Validation Epoch 60: 100%|██████████| 203/203 [00:01<00:00, 177.57it/s, loss=3.4514]



Epoch 60:
Average Train Loss: 3.9061
Average Val Loss: 3.9477


Training Epoch 61: 100%|██████████| 811/811 [00:09<00:00, 89.82it/s, loss=4.4948]
Validation Epoch 61: 100%|██████████| 203/203 [00:01<00:00, 188.80it/s, loss=4.2227]



Epoch 61:
Average Train Loss: 3.8692
Average Val Loss: 3.9354


Training Epoch 62: 100%|██████████| 811/811 [00:08<00:00, 90.63it/s, loss=3.6872]
Validation Epoch 62: 100%|██████████| 203/203 [00:01<00:00, 190.52it/s, loss=4.3225]



Epoch 62:
Average Train Loss: 3.8892
Average Val Loss: 3.8517


Training Epoch 63: 100%|██████████| 811/811 [00:09<00:00, 88.81it/s, loss=2.8877]
Validation Epoch 63: 100%|██████████| 203/203 [00:01<00:00, 189.00it/s, loss=2.5867]



Epoch 63:
Average Train Loss: 3.9208
Average Val Loss: 3.9241


Training Epoch 64: 100%|██████████| 811/811 [00:09<00:00, 88.44it/s, loss=3.6943]
Validation Epoch 64: 100%|██████████| 203/203 [00:01<00:00, 185.03it/s, loss=4.1638]



Epoch 64:
Average Train Loss: 3.8746
Average Val Loss: 3.8866


Training Epoch 65: 100%|██████████| 811/811 [00:09<00:00, 89.28it/s, loss=3.4134]
Validation Epoch 65: 100%|██████████| 203/203 [00:01<00:00, 181.47it/s, loss=4.4142]



Epoch 65:
Average Train Loss: 3.8591
Average Val Loss: 3.9171


Training Epoch 66: 100%|██████████| 811/811 [00:09<00:00, 86.58it/s, loss=3.9758]
Validation Epoch 66: 100%|██████████| 203/203 [00:01<00:00, 181.63it/s, loss=3.8197]



Epoch 66:
Average Train Loss: 3.8897
Average Val Loss: 3.8798


Training Epoch 67: 100%|██████████| 811/811 [00:09<00:00, 85.19it/s, loss=3.9192]
Validation Epoch 67: 100%|██████████| 203/203 [00:01<00:00, 171.75it/s, loss=3.4297]



Epoch 67:
Average Train Loss: 3.9055
Average Val Loss: 3.9401


Training Epoch 68: 100%|██████████| 811/811 [00:09<00:00, 84.79it/s, loss=4.5533]
Validation Epoch 68: 100%|██████████| 203/203 [00:01<00:00, 185.21it/s, loss=4.8121]



Epoch 68:
Average Train Loss: 3.8865
Average Val Loss: 3.8965


Training Epoch 69: 100%|██████████| 811/811 [00:09<00:00, 86.28it/s, loss=4.5256]
Validation Epoch 69: 100%|██████████| 203/203 [00:01<00:00, 162.00it/s, loss=3.5583]



Epoch 69:
Average Train Loss: 3.8926
Average Val Loss: 3.9233


Training Epoch 70: 100%|██████████| 811/811 [00:09<00:00, 86.25it/s, loss=3.4832]
Validation Epoch 70: 100%|██████████| 203/203 [00:01<00:00, 179.36it/s, loss=4.0845]



Epoch 70:
Average Train Loss: 3.8835
Average Val Loss: 3.8879


Training Epoch 71: 100%|██████████| 811/811 [00:09<00:00, 84.83it/s, loss=3.3919]
Validation Epoch 71: 100%|██████████| 203/203 [00:01<00:00, 177.41it/s, loss=4.0268]



Epoch 71:
Average Train Loss: 3.9137
Average Val Loss: 3.9097


Training Epoch 72: 100%|██████████| 811/811 [00:09<00:00, 84.44it/s, loss=4.1288]
Validation Epoch 72: 100%|██████████| 203/203 [00:01<00:00, 181.80it/s, loss=3.6985]



Epoch 72:
Average Train Loss: 3.8618
Average Val Loss: 3.9493


Training Epoch 73: 100%|██████████| 811/811 [00:09<00:00, 84.70it/s, loss=4.4484]
Validation Epoch 73: 100%|██████████| 203/203 [00:01<00:00, 166.65it/s, loss=2.9787]



Epoch 73:
Average Train Loss: 3.8960
Average Val Loss: 3.9289


Training Epoch 74: 100%|██████████| 811/811 [00:09<00:00, 83.20it/s, loss=4.3215]
Validation Epoch 74: 100%|██████████| 203/203 [00:01<00:00, 170.99it/s, loss=4.0098]



Epoch 74:
Average Train Loss: 3.9057
Average Val Loss: 3.9012


Training Epoch 75: 100%|██████████| 811/811 [00:09<00:00, 86.40it/s, loss=3.0002]
Validation Epoch 75: 100%|██████████| 203/203 [00:01<00:00, 178.86it/s, loss=3.2082]



Epoch 75:
Average Train Loss: 3.8913
Average Val Loss: 3.9021


Training Epoch 76: 100%|██████████| 811/811 [00:09<00:00, 85.73it/s, loss=2.2476]
Validation Epoch 76: 100%|██████████| 203/203 [00:01<00:00, 191.51it/s, loss=3.2200]



Epoch 76:
Average Train Loss: 3.8965
Average Val Loss: 3.8520


Training Epoch 77: 100%|██████████| 811/811 [00:09<00:00, 83.41it/s, loss=4.1010]
Validation Epoch 77: 100%|██████████| 203/203 [00:01<00:00, 164.11it/s, loss=3.3872]



Epoch 77:
Average Train Loss: 3.8663
Average Val Loss: 3.8604


Training Epoch 78: 100%|██████████| 811/811 [00:10<00:00, 79.89it/s, loss=3.7008]
Validation Epoch 78: 100%|██████████| 203/203 [00:01<00:00, 165.31it/s, loss=4.1191]



Epoch 78:
Average Train Loss: 3.9275
Average Val Loss: 3.8644


Training Epoch 79: 100%|██████████| 811/811 [00:09<00:00, 86.24it/s, loss=3.7859]
Validation Epoch 79: 100%|██████████| 203/203 [00:01<00:00, 181.52it/s, loss=3.5963]



Epoch 79:
Average Train Loss: 3.8638
Average Val Loss: 3.8679


Training Epoch 80: 100%|██████████| 811/811 [00:09<00:00, 83.40it/s, loss=4.1959]
Validation Epoch 80: 100%|██████████| 203/203 [00:01<00:00, 171.58it/s, loss=4.7762]



Epoch 80:
Average Train Loss: 3.8892
Average Val Loss: 3.8572


Training Epoch 81: 100%|██████████| 811/811 [00:09<00:00, 88.64it/s, loss=3.8666]
Validation Epoch 81: 100%|██████████| 203/203 [00:01<00:00, 181.45it/s, loss=3.7513]



Epoch 81:
Average Train Loss: 3.8655
Average Val Loss: 3.9115


Training Epoch 82: 100%|██████████| 811/811 [00:09<00:00, 83.53it/s, loss=4.2613]
Validation Epoch 82: 100%|██████████| 203/203 [00:01<00:00, 165.11it/s, loss=4.1944]



Epoch 82:
Average Train Loss: 3.8973
Average Val Loss: 3.9062


Training Epoch 83: 100%|██████████| 811/811 [00:09<00:00, 82.59it/s, loss=4.5656]
Validation Epoch 83: 100%|██████████| 203/203 [00:01<00:00, 173.86it/s, loss=3.4200]



Epoch 83:
Average Train Loss: 3.8586
Average Val Loss: 3.9370


Training Epoch 84: 100%|██████████| 811/811 [00:09<00:00, 89.17it/s, loss=4.4991]
Validation Epoch 84: 100%|██████████| 203/203 [00:01<00:00, 177.22it/s, loss=4.5344]



Epoch 84:
Average Train Loss: 3.9232
Average Val Loss: 3.8638


Training Epoch 85: 100%|██████████| 811/811 [00:10<00:00, 80.86it/s, loss=3.4020]
Validation Epoch 85: 100%|██████████| 203/203 [00:01<00:00, 160.58it/s, loss=3.8478]



Epoch 85:
Average Train Loss: 3.9231
Average Val Loss: 3.8508


Training Epoch 86: 100%|██████████| 811/811 [00:09<00:00, 82.17it/s, loss=3.1828]
Validation Epoch 86: 100%|██████████| 203/203 [00:01<00:00, 167.91it/s, loss=3.9768]



Epoch 86:
Average Train Loss: 3.9271
Average Val Loss: 3.8546


Training Epoch 87: 100%|██████████| 811/811 [00:09<00:00, 84.32it/s, loss=4.6608]
Validation Epoch 87: 100%|██████████| 203/203 [00:01<00:00, 170.74it/s, loss=4.5427]



Epoch 87:
Average Train Loss: 3.8945
Average Val Loss: 3.9002


Training Epoch 88: 100%|██████████| 811/811 [00:09<00:00, 87.50it/s, loss=3.6510]
Validation Epoch 88: 100%|██████████| 203/203 [00:01<00:00, 182.96it/s, loss=3.9340]



Epoch 88:
Average Train Loss: 3.9002
Average Val Loss: 3.9090


Training Epoch 89: 100%|██████████| 811/811 [00:09<00:00, 85.73it/s, loss=2.5100]
Validation Epoch 89: 100%|██████████| 203/203 [00:01<00:00, 173.15it/s, loss=4.3047]



Epoch 89:
Average Train Loss: 3.8830
Average Val Loss: 3.8768


Training Epoch 90: 100%|██████████| 811/811 [00:09<00:00, 89.25it/s, loss=4.2248]
Validation Epoch 90: 100%|██████████| 203/203 [00:01<00:00, 185.13it/s, loss=4.5674]



Epoch 90:
Average Train Loss: 3.9052
Average Val Loss: 3.8936


Training Epoch 91: 100%|██████████| 811/811 [00:09<00:00, 83.26it/s, loss=3.7549]
Validation Epoch 91: 100%|██████████| 203/203 [00:01<00:00, 182.79it/s, loss=4.5890]



Epoch 91:
Average Train Loss: 3.8934
Average Val Loss: 3.8326


Training Epoch 92: 100%|██████████| 811/811 [00:09<00:00, 85.27it/s, loss=3.7391]
Validation Epoch 92: 100%|██████████| 203/203 [00:01<00:00, 166.60it/s, loss=4.4114]



Epoch 92:
Average Train Loss: 3.8562
Average Val Loss: 3.9349


Training Epoch 93: 100%|██████████| 811/811 [00:09<00:00, 84.85it/s, loss=2.9209]
Validation Epoch 93: 100%|██████████| 203/203 [00:01<00:00, 172.11it/s, loss=3.9502]



Epoch 93:
Average Train Loss: 3.8709
Average Val Loss: 3.9140


Training Epoch 94: 100%|██████████| 811/811 [00:09<00:00, 82.58it/s, loss=3.0367]
Validation Epoch 94: 100%|██████████| 203/203 [00:01<00:00, 173.35it/s, loss=4.2129]



Epoch 94:
Average Train Loss: 3.8908
Average Val Loss: 3.9323


Training Epoch 95: 100%|██████████| 811/811 [00:09<00:00, 83.32it/s, loss=4.1359]
Validation Epoch 95: 100%|██████████| 203/203 [00:01<00:00, 166.14it/s, loss=4.6475]



Epoch 95:
Average Train Loss: 3.8971
Average Val Loss: 3.8847


Training Epoch 96: 100%|██████████| 811/811 [00:09<00:00, 84.50it/s, loss=3.7654]
Validation Epoch 96: 100%|██████████| 203/203 [00:01<00:00, 170.74it/s, loss=3.8270]



Epoch 96:
Average Train Loss: 3.8796
Average Val Loss: 3.9292


Training Epoch 97: 100%|██████████| 811/811 [00:09<00:00, 84.89it/s, loss=3.3250]
Validation Epoch 97: 100%|██████████| 203/203 [00:01<00:00, 176.67it/s, loss=4.3858]



Epoch 97:
Average Train Loss: 3.9282
Average Val Loss: 3.9076


Training Epoch 98: 100%|██████████| 811/811 [00:09<00:00, 88.35it/s, loss=3.7346]
Validation Epoch 98: 100%|██████████| 203/203 [00:01<00:00, 179.36it/s, loss=3.0233]



Epoch 98:
Average Train Loss: 3.8701
Average Val Loss: 3.9153


Training Epoch 99: 100%|██████████| 811/811 [00:09<00:00, 86.97it/s, loss=4.3688]
Validation Epoch 99: 100%|██████████| 203/203 [00:01<00:00, 165.42it/s, loss=4.4931]



Epoch 99:
Average Train Loss: 3.8999
Average Val Loss: 3.8173


Training Epoch 100: 100%|██████████| 811/811 [00:09<00:00, 86.96it/s, loss=4.0811]
Validation Epoch 100: 100%|██████████| 203/203 [00:01<00:00, 159.23it/s, loss=3.7978]



Epoch 100:
Average Train Loss: 3.9149
Average Val Loss: 3.9207


Training Epoch 101: 100%|██████████| 811/811 [00:09<00:00, 85.95it/s, loss=4.2394]
Validation Epoch 101: 100%|██████████| 203/203 [00:01<00:00, 175.39it/s, loss=4.8441]



Epoch 101:
Average Train Loss: 3.8787
Average Val Loss: 3.8660


Training Epoch 102: 100%|██████████| 811/811 [00:09<00:00, 86.05it/s, loss=3.7818]
Validation Epoch 102: 100%|██████████| 203/203 [00:01<00:00, 184.79it/s, loss=2.5103]



Epoch 102:
Average Train Loss: 3.9210
Average Val Loss: 3.9023


Training Epoch 103: 100%|██████████| 811/811 [00:09<00:00, 85.79it/s, loss=4.1247]
Validation Epoch 103: 100%|██████████| 203/203 [00:01<00:00, 177.17it/s, loss=4.4548]



Epoch 103:
Average Train Loss: 3.9028
Average Val Loss: 3.9149


Training Epoch 104: 100%|██████████| 811/811 [00:08<00:00, 90.64it/s, loss=4.2043]
Validation Epoch 104: 100%|██████████| 203/203 [00:01<00:00, 174.46it/s, loss=3.5419]



Epoch 104:
Average Train Loss: 3.8896
Average Val Loss: 3.8218


Training Epoch 105: 100%|██████████| 811/811 [00:09<00:00, 85.03it/s, loss=3.9434]
Validation Epoch 105: 100%|██████████| 203/203 [00:01<00:00, 171.61it/s, loss=4.5806]



Epoch 105:
Average Train Loss: 3.8531
Average Val Loss: 3.8456


Training Epoch 106: 100%|██████████| 811/811 [00:09<00:00, 87.38it/s, loss=3.6050]
Validation Epoch 106: 100%|██████████| 203/203 [00:01<00:00, 182.33it/s, loss=4.4872]



Epoch 106:
Average Train Loss: 3.8623
Average Val Loss: 3.9041


Training Epoch 107: 100%|██████████| 811/811 [00:09<00:00, 82.42it/s, loss=4.5887]
Validation Epoch 107: 100%|██████████| 203/203 [00:01<00:00, 177.87it/s, loss=4.5606]



Epoch 107:
Average Train Loss: 3.8954
Average Val Loss: 3.8216


Training Epoch 108: 100%|██████████| 811/811 [00:09<00:00, 82.31it/s, loss=3.8788]
Validation Epoch 108: 100%|██████████| 203/203 [00:01<00:00, 165.54it/s, loss=3.7677]



Epoch 108:
Average Train Loss: 3.8894
Average Val Loss: 3.9366


Training Epoch 109: 100%|██████████| 811/811 [00:09<00:00, 81.34it/s, loss=4.7006]
Validation Epoch 109: 100%|██████████| 203/203 [00:01<00:00, 173.85it/s, loss=3.2328]



Epoch 109:
Average Train Loss: 3.8753
Average Val Loss: 3.8517


Training Epoch 110: 100%|██████████| 811/811 [00:09<00:00, 86.09it/s, loss=3.0678]
Validation Epoch 110: 100%|██████████| 203/203 [00:01<00:00, 194.47it/s, loss=3.9207]



Epoch 110:
Average Train Loss: 3.9079
Average Val Loss: 3.9327


Training Epoch 111: 100%|██████████| 811/811 [00:09<00:00, 83.62it/s, loss=4.8120]
Validation Epoch 111: 100%|██████████| 203/203 [00:01<00:00, 175.01it/s, loss=4.4415]



Epoch 111:
Average Train Loss: 3.8734
Average Val Loss: 3.8844


Training Epoch 112: 100%|██████████| 811/811 [00:09<00:00, 82.76it/s, loss=4.2076]
Validation Epoch 112: 100%|██████████| 203/203 [00:01<00:00, 166.26it/s, loss=3.7629]



Epoch 112:
Average Train Loss: 3.9113
Average Val Loss: 3.8725


Training Epoch 113: 100%|██████████| 811/811 [00:09<00:00, 82.51it/s, loss=3.5451]
Validation Epoch 113: 100%|██████████| 203/203 [00:01<00:00, 168.89it/s, loss=4.1247]



Epoch 113:
Average Train Loss: 3.8620
Average Val Loss: 3.8733


Training Epoch 114: 100%|██████████| 811/811 [00:09<00:00, 83.58it/s, loss=3.0635]
Validation Epoch 114: 100%|██████████| 203/203 [00:01<00:00, 179.87it/s, loss=4.0921]



Epoch 114:
Average Train Loss: 3.8941
Average Val Loss: 3.8470


Training Epoch 115: 100%|██████████| 811/811 [00:09<00:00, 85.27it/s, loss=4.2663]
Validation Epoch 115: 100%|██████████| 203/203 [00:01<00:00, 171.29it/s, loss=4.2101]



Epoch 115:
Average Train Loss: 3.8948
Average Val Loss: 3.9038


Training Epoch 116: 100%|██████████| 811/811 [00:09<00:00, 85.21it/s, loss=3.8827]
Validation Epoch 116: 100%|██████████| 203/203 [00:01<00:00, 168.37it/s, loss=4.6774]



Epoch 116:
Average Train Loss: 3.9102
Average Val Loss: 3.8893


Training Epoch 117: 100%|██████████| 811/811 [00:09<00:00, 87.66it/s, loss=3.5538]
Validation Epoch 117: 100%|██████████| 203/203 [00:01<00:00, 156.91it/s, loss=3.9665]



Epoch 117:
Average Train Loss: 3.9308
Average Val Loss: 3.8886


Training Epoch 118: 100%|██████████| 811/811 [00:09<00:00, 84.72it/s, loss=2.9741]
Validation Epoch 118: 100%|██████████| 203/203 [00:01<00:00, 174.76it/s, loss=4.0026]



Epoch 118:
Average Train Loss: 3.8902
Average Val Loss: 3.8454


Training Epoch 119: 100%|██████████| 811/811 [00:09<00:00, 86.12it/s, loss=3.4289]
Validation Epoch 119: 100%|██████████| 203/203 [00:01<00:00, 163.11it/s, loss=4.3281]



Epoch 119:
Average Train Loss: 3.9015
Average Val Loss: 3.8753


Training Epoch 120: 100%|██████████| 811/811 [00:09<00:00, 84.24it/s, loss=4.2834]
Validation Epoch 120: 100%|██████████| 203/203 [00:01<00:00, 186.28it/s, loss=2.9428]



Epoch 120:
Average Train Loss: 3.8829
Average Val Loss: 3.8512


Training Epoch 121: 100%|██████████| 811/811 [00:09<00:00, 85.15it/s, loss=4.1916]
Validation Epoch 121: 100%|██████████| 203/203 [00:01<00:00, 170.10it/s, loss=4.3661]



Epoch 121:
Average Train Loss: 3.8935
Average Val Loss: 3.9162


Training Epoch 122: 100%|██████████| 811/811 [00:09<00:00, 82.85it/s, loss=3.3719]
Validation Epoch 122: 100%|██████████| 203/203 [00:01<00:00, 185.78it/s, loss=3.6027]



Epoch 122:
Average Train Loss: 3.9097
Average Val Loss: 3.9419


Training Epoch 123: 100%|██████████| 811/811 [00:09<00:00, 88.47it/s, loss=4.4149]
Validation Epoch 123: 100%|██████████| 203/203 [00:01<00:00, 182.69it/s, loss=3.7494]



Epoch 123:
Average Train Loss: 3.8769
Average Val Loss: 3.7832


Training Epoch 124: 100%|██████████| 811/811 [00:09<00:00, 85.52it/s, loss=3.4949]
Validation Epoch 124: 100%|██████████| 203/203 [00:01<00:00, 177.27it/s, loss=3.2082]



Epoch 124:
Average Train Loss: 3.9140
Average Val Loss: 3.8122


Training Epoch 125: 100%|██████████| 811/811 [00:09<00:00, 88.10it/s, loss=4.1082]
Validation Epoch 125: 100%|██████████| 203/203 [00:01<00:00, 191.14it/s, loss=3.2707]



Epoch 125:
Average Train Loss: 3.9011
Average Val Loss: 3.9148


Training Epoch 126: 100%|██████████| 811/811 [00:09<00:00, 87.45it/s, loss=4.4757]
Validation Epoch 126: 100%|██████████| 203/203 [00:01<00:00, 189.51it/s, loss=4.0237]



Epoch 126:
Average Train Loss: 3.8922
Average Val Loss: 3.8769


Training Epoch 127: 100%|██████████| 811/811 [00:08<00:00, 91.88it/s, loss=3.6050]
Validation Epoch 127: 100%|██████████| 203/203 [00:01<00:00, 193.48it/s, loss=4.5796]



Epoch 127:
Average Train Loss: 3.8805
Average Val Loss: 3.9332


Training Epoch 128: 100%|██████████| 811/811 [00:08<00:00, 91.27it/s, loss=4.3615]
Validation Epoch 128: 100%|██████████| 203/203 [00:01<00:00, 185.97it/s, loss=3.7270]



Epoch 128:
Average Train Loss: 3.8474
Average Val Loss: 3.8941


Training Epoch 129: 100%|██████████| 811/811 [00:08<00:00, 94.48it/s, loss=4.0510]
Validation Epoch 129: 100%|██████████| 203/203 [00:01<00:00, 191.04it/s, loss=4.2567]



Epoch 129:
Average Train Loss: 3.8698
Average Val Loss: 3.8920


Training Epoch 130: 100%|██████████| 811/811 [00:10<00:00, 75.57it/s, loss=3.6325]
Validation Epoch 130: 100%|██████████| 203/203 [00:01<00:00, 196.07it/s, loss=3.9825]



Epoch 130:
Average Train Loss: 3.8956
Average Val Loss: 3.8497


Training Epoch 131: 100%|██████████| 811/811 [00:08<00:00, 92.98it/s, loss=4.2394]
Validation Epoch 131: 100%|██████████| 203/203 [00:01<00:00, 198.54it/s, loss=3.5378]



Epoch 131:
Average Train Loss: 3.8757
Average Val Loss: 3.9284


Training Epoch 132: 100%|██████████| 811/811 [00:08<00:00, 96.24it/s, loss=3.6065]
Validation Epoch 132: 100%|██████████| 203/203 [00:00<00:00, 203.80it/s, loss=3.8159]



Epoch 132:
Average Train Loss: 3.8584
Average Val Loss: 3.8562


Training Epoch 133: 100%|██████████| 811/811 [00:08<00:00, 92.99it/s, loss=4.3656] 
Validation Epoch 133: 100%|██████████| 203/203 [00:01<00:00, 172.42it/s, loss=4.2338]



Epoch 133:
Average Train Loss: 3.9038
Average Val Loss: 3.8918


Training Epoch 134: 100%|██████████| 811/811 [00:09<00:00, 87.29it/s, loss=4.5881]
Validation Epoch 134: 100%|██████████| 203/203 [00:01<00:00, 185.31it/s, loss=4.0936]



Epoch 134:
Average Train Loss: 3.9074
Average Val Loss: 3.8973


Training Epoch 135: 100%|██████████| 811/811 [00:09<00:00, 86.02it/s, loss=4.5291]
Validation Epoch 135: 100%|██████████| 203/203 [00:01<00:00, 190.90it/s, loss=3.1138]



Epoch 135:
Average Train Loss: 3.9070
Average Val Loss: 3.8533


Training Epoch 136: 100%|██████████| 811/811 [00:09<00:00, 89.71it/s, loss=4.8069]
Validation Epoch 136: 100%|██████████| 203/203 [00:01<00:00, 181.56it/s, loss=3.4907]



Epoch 136:
Average Train Loss: 3.8844
Average Val Loss: 3.8822


Training Epoch 137: 100%|██████████| 811/811 [00:09<00:00, 86.05it/s, loss=4.0082]
Validation Epoch 137: 100%|██████████| 203/203 [00:01<00:00, 182.05it/s, loss=3.9025]



Epoch 137:
Average Train Loss: 3.8728
Average Val Loss: 3.8519


Training Epoch 138: 100%|██████████| 811/811 [00:09<00:00, 84.47it/s, loss=4.5192]
Validation Epoch 138: 100%|██████████| 203/203 [00:01<00:00, 179.10it/s, loss=3.3086]



Epoch 138:
Average Train Loss: 3.9073
Average Val Loss: 3.8166


Training Epoch 139: 100%|██████████| 811/811 [00:09<00:00, 83.50it/s, loss=4.5505]
Validation Epoch 139: 100%|██████████| 203/203 [00:01<00:00, 176.27it/s, loss=4.1006]



Epoch 139:
Average Train Loss: 3.9078
Average Val Loss: 3.7933


Training Epoch 140: 100%|██████████| 811/811 [00:09<00:00, 86.52it/s, loss=4.2055]
Validation Epoch 140: 100%|██████████| 203/203 [00:01<00:00, 187.11it/s, loss=3.3641]



Epoch 140:
Average Train Loss: 3.8765
Average Val Loss: 3.8564


Training Epoch 141: 100%|██████████| 811/811 [00:09<00:00, 90.00it/s, loss=3.3835]
Validation Epoch 141: 100%|██████████| 203/203 [00:01<00:00, 189.46it/s, loss=3.0685]



Epoch 141:
Average Train Loss: 3.8738
Average Val Loss: 3.8633


Training Epoch 142: 100%|██████████| 811/811 [00:09<00:00, 89.26it/s, loss=2.9955]
Validation Epoch 142: 100%|██████████| 203/203 [00:01<00:00, 189.84it/s, loss=4.6070]



Epoch 142:
Average Train Loss: 3.8889
Average Val Loss: 3.9402


Training Epoch 143: 100%|██████████| 811/811 [00:09<00:00, 86.69it/s, loss=3.5737]
Validation Epoch 143: 100%|██████████| 203/203 [00:01<00:00, 190.38it/s, loss=4.5147]



Epoch 143:
Average Train Loss: 3.8568
Average Val Loss: 3.7923


Training Epoch 144: 100%|██████████| 811/811 [00:09<00:00, 87.16it/s, loss=3.8936]
Validation Epoch 144: 100%|██████████| 203/203 [00:01<00:00, 185.86it/s, loss=4.2818]



Epoch 144:
Average Train Loss: 3.8476
Average Val Loss: 3.9008


Training Epoch 145: 100%|██████████| 811/811 [00:09<00:00, 86.79it/s, loss=3.1564]
Validation Epoch 145: 100%|██████████| 203/203 [00:01<00:00, 178.88it/s, loss=3.5725]



Epoch 145:
Average Train Loss: 3.8604
Average Val Loss: 3.8816


Training Epoch 146: 100%|██████████| 811/811 [00:09<00:00, 88.31it/s, loss=3.7965]
Validation Epoch 146: 100%|██████████| 203/203 [00:01<00:00, 176.03it/s, loss=4.1947]



Epoch 146:
Average Train Loss: 3.9019
Average Val Loss: 3.8887


Training Epoch 147: 100%|██████████| 811/811 [00:09<00:00, 87.47it/s, loss=4.3211]
Validation Epoch 147: 100%|██████████| 203/203 [00:01<00:00, 185.90it/s, loss=3.8802]



Epoch 147:
Average Train Loss: 3.8897
Average Val Loss: 3.9232


Training Epoch 148: 100%|██████████| 811/811 [00:09<00:00, 89.42it/s, loss=3.7845]
Validation Epoch 148: 100%|██████████| 203/203 [00:01<00:00, 181.68it/s, loss=3.7073]



Epoch 148:
Average Train Loss: 3.8938
Average Val Loss: 3.8731


Training Epoch 149: 100%|██████████| 811/811 [00:09<00:00, 86.69it/s, loss=4.6203]
Validation Epoch 149: 100%|██████████| 203/203 [00:01<00:00, 170.81it/s, loss=4.3907]



Epoch 149:
Average Train Loss: 3.8760
Average Val Loss: 3.9547


Training Epoch 150: 100%|██████████| 811/811 [00:09<00:00, 85.45it/s, loss=3.2732]
Validation Epoch 150: 100%|██████████| 203/203 [00:01<00:00, 177.41it/s, loss=3.9437]



Epoch 150:
Average Train Loss: 3.8804
Average Val Loss: 3.8954


Training Epoch 151: 100%|██████████| 811/811 [00:09<00:00, 86.40it/s, loss=4.5596]
Validation Epoch 151: 100%|██████████| 203/203 [00:01<00:00, 179.27it/s, loss=3.7554]



Epoch 151:
Average Train Loss: 3.8841
Average Val Loss: 3.9189


Training Epoch 152: 100%|██████████| 811/811 [00:09<00:00, 86.62it/s, loss=4.6418]
Validation Epoch 152: 100%|██████████| 203/203 [00:01<00:00, 170.01it/s, loss=4.6085]



Epoch 152:
Average Train Loss: 3.8707
Average Val Loss: 3.8344


Training Epoch 153: 100%|██████████| 811/811 [00:09<00:00, 84.66it/s, loss=2.5294]
Validation Epoch 153: 100%|██████████| 203/203 [00:01<00:00, 174.55it/s, loss=3.8901]



Epoch 153:
Average Train Loss: 3.8825
Average Val Loss: 3.9140


Training Epoch 154: 100%|██████████| 811/811 [00:09<00:00, 85.89it/s, loss=4.3365]
Validation Epoch 154: 100%|██████████| 203/203 [00:01<00:00, 181.73it/s, loss=4.7677]



Epoch 154:
Average Train Loss: 3.8656
Average Val Loss: 3.9245


Training Epoch 155: 100%|██████████| 811/811 [00:09<00:00, 86.01it/s, loss=3.8527]
Validation Epoch 155: 100%|██████████| 203/203 [00:01<00:00, 178.93it/s, loss=3.9135]



Epoch 155:
Average Train Loss: 3.8920
Average Val Loss: 3.7916


Training Epoch 156: 100%|██████████| 811/811 [00:09<00:00, 88.04it/s, loss=4.2875]
Validation Epoch 156: 100%|██████████| 203/203 [00:01<00:00, 177.12it/s, loss=4.2143]



Epoch 156:
Average Train Loss: 3.8904
Average Val Loss: 3.8540


Training Epoch 157: 100%|██████████| 811/811 [00:09<00:00, 87.99it/s, loss=4.6095]
Validation Epoch 157: 100%|██████████| 203/203 [00:01<00:00, 187.81it/s, loss=2.7668]



Epoch 157:
Average Train Loss: 3.8976
Average Val Loss: 3.8835


Training Epoch 158: 100%|██████████| 811/811 [00:09<00:00, 86.91it/s, loss=3.4788]
Validation Epoch 158: 100%|██████████| 203/203 [00:01<00:00, 184.35it/s, loss=4.6146]



Epoch 158:
Average Train Loss: 3.8514
Average Val Loss: 3.9245


Training Epoch 159: 100%|██████████| 811/811 [00:09<00:00, 85.54it/s, loss=4.2891]
Validation Epoch 159: 100%|██████████| 203/203 [00:01<00:00, 187.16it/s, loss=4.7037]



Epoch 159:
Average Train Loss: 3.8780
Average Val Loss: 3.9059


Training Epoch 160: 100%|██████████| 811/811 [00:09<00:00, 85.39it/s, loss=4.3586]
Validation Epoch 160: 100%|██████████| 203/203 [00:01<00:00, 171.54it/s, loss=4.3084]



Epoch 160:
Average Train Loss: 3.8627
Average Val Loss: 3.9072


Training Epoch 161: 100%|██████████| 811/811 [00:09<00:00, 87.95it/s, loss=4.3897]
Validation Epoch 161: 100%|██████████| 203/203 [00:01<00:00, 178.61it/s, loss=3.1120]



Epoch 161:
Average Train Loss: 3.8842
Average Val Loss: 3.8626


Training Epoch 162: 100%|██████████| 811/811 [00:09<00:00, 83.30it/s, loss=4.2267]
Validation Epoch 162: 100%|██████████| 203/203 [00:01<00:00, 176.67it/s, loss=3.9921]



Epoch 162:
Average Train Loss: 3.8512
Average Val Loss: 3.8644


Training Epoch 163: 100%|██████████| 811/811 [00:09<00:00, 81.83it/s, loss=4.1452]
Validation Epoch 163: 100%|██████████| 203/203 [00:01<00:00, 172.74it/s, loss=4.4357]



Epoch 163:
Average Train Loss: 3.8778
Average Val Loss: 3.9077


Training Epoch 164: 100%|██████████| 811/811 [00:09<00:00, 84.11it/s, loss=4.7429]
Validation Epoch 164: 100%|██████████| 203/203 [00:01<00:00, 185.55it/s, loss=4.7145]



Epoch 164:
Average Train Loss: 3.8688
Average Val Loss: 3.8833


Training Epoch 165: 100%|██████████| 811/811 [00:09<00:00, 85.73it/s, loss=4.2181]
Validation Epoch 165: 100%|██████████| 203/203 [00:01<00:00, 194.83it/s, loss=3.4187]



Epoch 165:
Average Train Loss: 3.9205
Average Val Loss: 3.8289


Training Epoch 166: 100%|██████████| 811/811 [00:09<00:00, 87.73it/s, loss=3.4387]
Validation Epoch 166: 100%|██████████| 203/203 [00:01<00:00, 181.43it/s, loss=3.3197]



Epoch 166:
Average Train Loss: 3.9035
Average Val Loss: 3.8908


Training Epoch 167: 100%|██████████| 811/811 [00:09<00:00, 85.55it/s, loss=4.0927]
Validation Epoch 167: 100%|██████████| 203/203 [00:01<00:00, 177.31it/s, loss=4.0586]



Epoch 167:
Average Train Loss: 3.8770
Average Val Loss: 3.8587


Training Epoch 168: 100%|██████████| 811/811 [00:09<00:00, 86.64it/s, loss=3.8604]
Validation Epoch 168: 100%|██████████| 203/203 [00:01<00:00, 191.69it/s, loss=4.1145]



Epoch 168:
Average Train Loss: 3.8686
Average Val Loss: 3.9085


Training Epoch 169: 100%|██████████| 811/811 [00:09<00:00, 86.79it/s, loss=2.9642]
Validation Epoch 169: 100%|██████████| 203/203 [00:01<00:00, 185.26it/s, loss=3.7285]



Epoch 169:
Average Train Loss: 3.8804
Average Val Loss: 3.8954


Training Epoch 170: 100%|██████████| 811/811 [00:09<00:00, 86.22it/s, loss=3.3780]
Validation Epoch 170: 100%|██████████| 203/203 [00:01<00:00, 180.20it/s, loss=3.0032]



Epoch 170:
Average Train Loss: 3.8933
Average Val Loss: 3.9051


Training Epoch 171: 100%|██████████| 811/811 [00:09<00:00, 85.80it/s, loss=4.5308]
Validation Epoch 171: 100%|██████████| 203/203 [00:01<00:00, 183.47it/s, loss=4.1333]



Epoch 171:
Average Train Loss: 3.8761
Average Val Loss: 3.8818


Training Epoch 172: 100%|██████████| 811/811 [00:09<00:00, 84.58it/s, loss=4.7445]
Validation Epoch 172: 100%|██████████| 203/203 [00:01<00:00, 179.47it/s, loss=4.5211]



Epoch 172:
Average Train Loss: 3.8861
Average Val Loss: 3.9168


Training Epoch 173: 100%|██████████| 811/811 [00:09<00:00, 89.66it/s, loss=3.6185]
Validation Epoch 173: 100%|██████████| 203/203 [00:01<00:00, 181.12it/s, loss=3.3894]



Epoch 173:
Average Train Loss: 3.8667
Average Val Loss: 3.8246


Training Epoch 174: 100%|██████████| 811/811 [00:09<00:00, 89.04it/s, loss=4.6741]
Validation Epoch 174: 100%|██████████| 203/203 [00:01<00:00, 191.56it/s, loss=3.6467]



Epoch 174:
Average Train Loss: 3.9043
Average Val Loss: 3.8836


Training Epoch 175: 100%|██████████| 811/811 [00:09<00:00, 85.54it/s, loss=3.6760]
Validation Epoch 175: 100%|██████████| 203/203 [00:01<00:00, 174.33it/s, loss=3.6468]



Epoch 175:
Average Train Loss: 3.8640
Average Val Loss: 3.8464


Training Epoch 176: 100%|██████████| 811/811 [00:09<00:00, 86.57it/s, loss=4.2955]
Validation Epoch 176: 100%|██████████| 203/203 [00:01<00:00, 183.44it/s, loss=2.6205]



Epoch 176:
Average Train Loss: 3.8955
Average Val Loss: 3.9064


Training Epoch 177: 100%|██████████| 811/811 [00:09<00:00, 83.80it/s, loss=3.4938]
Validation Epoch 177: 100%|██████████| 203/203 [00:01<00:00, 176.71it/s, loss=4.2287]



Epoch 177:
Average Train Loss: 3.8919
Average Val Loss: 3.8247


Training Epoch 178: 100%|██████████| 811/811 [00:09<00:00, 85.37it/s, loss=4.1985]
Validation Epoch 178: 100%|██████████| 203/203 [00:01<00:00, 177.91it/s, loss=4.0960]



Epoch 178:
Average Train Loss: 3.8562
Average Val Loss: 3.8580


Training Epoch 179: 100%|██████████| 811/811 [00:09<00:00, 84.48it/s, loss=3.4936]
Validation Epoch 179: 100%|██████████| 203/203 [00:01<00:00, 176.71it/s, loss=2.8378]



Epoch 179:
Average Train Loss: 3.8700
Average Val Loss: 3.8707


Training Epoch 180: 100%|██████████| 811/811 [00:09<00:00, 84.14it/s, loss=3.5454]
Validation Epoch 180: 100%|██████████| 203/203 [00:01<00:00, 178.32it/s, loss=3.4626]



Epoch 180:
Average Train Loss: 3.8831
Average Val Loss: 3.8483


Training Epoch 181: 100%|██████████| 811/811 [00:09<00:00, 83.72it/s, loss=4.3785]
Validation Epoch 181: 100%|██████████| 203/203 [00:01<00:00, 178.66it/s, loss=4.5858]



Epoch 181:
Average Train Loss: 3.8163
Average Val Loss: 3.9877


Training Epoch 182: 100%|██████████| 811/811 [00:09<00:00, 85.14it/s, loss=3.1930]
Validation Epoch 182: 100%|██████████| 203/203 [00:01<00:00, 177.56it/s, loss=3.6537]



Epoch 182:
Average Train Loss: 3.8873
Average Val Loss: 3.8995


Training Epoch 183: 100%|██████████| 811/811 [00:09<00:00, 85.32it/s, loss=2.2859]
Validation Epoch 183: 100%|██████████| 203/203 [00:01<00:00, 175.82it/s, loss=4.2269]



Epoch 183:
Average Train Loss: 3.8982
Average Val Loss: 3.8573


Training Epoch 184: 100%|██████████| 811/811 [00:09<00:00, 84.07it/s, loss=4.4881]
Validation Epoch 184: 100%|██████████| 203/203 [00:01<00:00, 182.91it/s, loss=4.5055]



Epoch 184:
Average Train Loss: 3.8623
Average Val Loss: 3.8336


Training Epoch 185: 100%|██████████| 811/811 [00:09<00:00, 85.74it/s, loss=4.6790]
Validation Epoch 185: 100%|██████████| 203/203 [00:01<00:00, 187.54it/s, loss=3.6728]



Epoch 185:
Average Train Loss: 3.9050
Average Val Loss: 3.8890


Training Epoch 186: 100%|██████████| 811/811 [00:09<00:00, 87.93it/s, loss=4.7525]
Validation Epoch 186: 100%|██████████| 203/203 [00:01<00:00, 174.93it/s, loss=4.4507]



Epoch 186:
Average Train Loss: 3.8711
Average Val Loss: 3.8862


Training Epoch 187: 100%|██████████| 811/811 [00:09<00:00, 88.84it/s, loss=4.2235]
Validation Epoch 187: 100%|██████████| 203/203 [00:01<00:00, 182.16it/s, loss=3.8993]



Epoch 187:
Average Train Loss: 3.8661
Average Val Loss: 3.9075


Training Epoch 188: 100%|██████████| 811/811 [00:09<00:00, 87.05it/s, loss=3.7993]
Validation Epoch 188: 100%|██████████| 203/203 [00:01<00:00, 180.81it/s, loss=2.5310]



Epoch 188:
Average Train Loss: 3.8956
Average Val Loss: 3.9386


Training Epoch 189: 100%|██████████| 811/811 [00:09<00:00, 86.67it/s, loss=4.6243]
Validation Epoch 189: 100%|██████████| 203/203 [00:01<00:00, 183.08it/s, loss=4.3165]



Epoch 189:
Average Train Loss: 3.8651
Average Val Loss: 3.7987


Training Epoch 190: 100%|██████████| 811/811 [00:09<00:00, 89.00it/s, loss=4.1118]
Validation Epoch 190: 100%|██████████| 203/203 [00:01<00:00, 182.94it/s, loss=3.3500]



Epoch 190:
Average Train Loss: 3.8750
Average Val Loss: 3.8976


Training Epoch 191: 100%|██████████| 811/811 [00:09<00:00, 87.71it/s, loss=4.3467]
Validation Epoch 191: 100%|██████████| 203/203 [00:01<00:00, 179.93it/s, loss=2.5556]



Epoch 191:
Average Train Loss: 3.8557
Average Val Loss: 3.8850


Training Epoch 192: 100%|██████████| 811/811 [00:09<00:00, 87.63it/s, loss=4.2664]
Validation Epoch 192: 100%|██████████| 203/203 [00:01<00:00, 181.99it/s, loss=3.2308]



Epoch 192:
Average Train Loss: 3.8603
Average Val Loss: 3.8588


Training Epoch 193: 100%|██████████| 811/811 [00:09<00:00, 89.11it/s, loss=4.5237]
Validation Epoch 193: 100%|██████████| 203/203 [00:01<00:00, 180.11it/s, loss=3.7034]



Epoch 193:
Average Train Loss: 3.8745
Average Val Loss: 3.9182


Training Epoch 194: 100%|██████████| 811/811 [00:09<00:00, 90.05it/s, loss=3.7802]
Validation Epoch 194: 100%|██████████| 203/203 [00:01<00:00, 191.62it/s, loss=3.7989]



Epoch 194:
Average Train Loss: 3.8587
Average Val Loss: 3.8663


Training Epoch 195: 100%|██████████| 811/811 [00:09<00:00, 90.07it/s, loss=4.1035]
Validation Epoch 195: 100%|██████████| 203/203 [00:01<00:00, 189.17it/s, loss=4.4527]



Epoch 195:
Average Train Loss: 3.8769
Average Val Loss: 3.9413


Training Epoch 196: 100%|██████████| 811/811 [00:08<00:00, 90.85it/s, loss=3.7245]
Validation Epoch 196: 100%|██████████| 203/203 [00:01<00:00, 194.13it/s, loss=4.1145]



Epoch 196:
Average Train Loss: 3.8859
Average Val Loss: 3.8887


Training Epoch 197: 100%|██████████| 811/811 [00:09<00:00, 88.85it/s, loss=4.7416]
Validation Epoch 197: 100%|██████████| 203/203 [00:01<00:00, 180.34it/s, loss=4.4881]



Epoch 197:
Average Train Loss: 3.8926
Average Val Loss: 3.9528


Training Epoch 198: 100%|██████████| 811/811 [00:09<00:00, 87.45it/s, loss=3.8179]
Validation Epoch 198: 100%|██████████| 203/203 [00:01<00:00, 193.10it/s, loss=3.2115]



Epoch 198:
Average Train Loss: 3.8997
Average Val Loss: 3.9000


Training Epoch 199: 100%|██████████| 811/811 [00:08<00:00, 90.30it/s, loss=2.6743]
Validation Epoch 199: 100%|██████████| 203/203 [00:01<00:00, 194.05it/s, loss=4.3213]


Epoch 199:
Average Train Loss: 3.8827
Average Val Loss: 3.9108





In [11]:
checkpoint = {
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'model_args': {
        'vocab_size': vocab_size,
        'embed_dim': embed_dim,
        'num_layers': num_layers,
        'num_heads': num_heads,
        'dim_feedforward': dim_feedforward,
        'num_fourier_features': num_fourier_features
    }
}

torch.save(checkpoint, 'model_checkpoint_3.96.pt')

In [None]:
from model_arch import CategoricalScoreDiffusion

checkpoint = torch.load('model_checkpoint_2.65.pt')
model = CategoricalScoreDiffusion(**checkpoint['model_args'])
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [None]:
# Access the learning rate
# Get the optimizer state dict
optimizer_state = checkpoint['optimizer_state_dict']
learning_rate = optimizer_state['param_groups'][0]['lr']
print(f"Learning rate: {learning_rate}")

Learning rate: 0.001


In [None]:
import time
from contextlib import contextmanager

@contextmanager
def timer(name):
    start = time.perf_counter()
    yield
    end = time.perf_counter()
    print(f"{name}: {(end - start)*1000:.2f} ms")