In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from scipy.optimize import linear_sum_assignment
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from collections import Counter

from helper_funcs import generate_sequences


In [2]:
import wandb
run = wandb.init()
artifact = run.use_artifact('matteopeluso1922/cdcd-hmp-param-search-orion_truewarp/best_model_aiicxkad:v0', type='model')
artifact_dir = artifact.download()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmatteopeluso1922[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [3]:
# Load data
loaded_df = pd.read_hdf('./data/sample_otu_arrays.h5', key='df')

# Set random seed
np.random.seed(42)

# Split indices into train/test
train_idx, test_idx = train_test_split(loaded_df.index, test_size=0.2, random_state=42)

# Create train and test dataframes
train_df = loaded_df.loc[train_idx]
test_df = loaded_df.loc[test_idx]

print(f"Train size: {len(train_df)}")
print(f"Test size: {len(test_df)}")
print("\nFirst few training samples:")
print(train_df.head())

# Let's also look at array lengths
array_lengths = [len(x) for x in loaded_df['otu_arrays']]
print(f"\nMin array length: {min(array_lengths)}")
print(f"Max array length: {max(array_lengths)}")
print(f"Mean array length: {np.mean(array_lengths):.2f}")

Train size: 6486
Test size: 1622

First few training samples:
                                                            otu_arrays
Unnamed: 0                                                            
SRR044975.SRS011167  [30, 58, 82, 89, 93, 98, 99, 104, 117, 120, 12...
SRR049604.SRS049164  [9, 10, 11, 14, 15, 16, 17, 20, 28, 30, 31, 32...
SRR331714.SRS076947  [19, 30, 43, 58, 65, 70, 71, 74, 80, 90, 92, 9...
SRR089999.SRS077685  [12, 14, 18, 20, 22, 38, 45, 67, 68, 76, 88, 1...
SRR048091.SRS021563  [19, 30, 45, 52, 58, 60, 65, 70, 74, 80, 90, 9...

Min array length: 3
Max array length: 277
Mean array length: 69.10


In [4]:
import torch
from torch.utils.data import Dataset, DataLoader

class OTUDataset(Dataset):
   def __init__(self, df):
       self.df = df
       
       # Find max sequence length for padding
       self.max_len = max(len(x) for x in df['otu_arrays'])
       
   def __len__(self):
       return len(self.df)
   
   def __getitem__(self, idx):
       # Get array for this sample
       array = self.df.iloc[idx]['otu_arrays']
       
       # Create padded tensor
       padded = torch.zeros(self.max_len, dtype=torch.long)
       padded[:len(array)] = torch.tensor(array)
       
       # Create mask (False where we have real tokens, True for padding)
       mask = torch.zeros(self.max_len, dtype=torch.bool)
       mask[len(array):] = True
       
       return padded, mask

# Create datasets
train_dataset = OTUDataset(train_df)
test_dataset = OTUDataset(test_df)

# Create dataloaders
batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Verify shapes
for tokens, mask in train_loader:
   print(f"Batch tokens shape: {tokens.shape}")
   print(f"Batch mask shape: {mask.shape}")

   break

# Get vocab size (maximum token ID + 1 for padding)
vocab_size = max(max(x) for x in loaded_df['otu_arrays']) + 1
print(f"\nVocabulary size: {vocab_size}")

Batch tokens shape: torch.Size([8, 277])
Batch mask shape: torch.Size([8, 277])

Vocabulary size: 519


In [5]:
import model_arch
import helper_funcs
import importlib
from model_arch import CategoricalScoreDiffusion
from helper_funcs import generate_sequences
importlib.reload(model_arch)
importlib.reload(helper_funcs)

<module 'helper_funcs' from '/mnt/mnemo9/mpelus/matlas/cdcd_multi_train/cdcd_hmp/helper_funcs.py'>

In [6]:

class TrainingMetrics:
    def __init__(self):
        self.best_val_loss = float('inf')

        
    def update_best_metrics(self, val_loss):
        improved = False
        if val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
            improved = True
        return improved

def train_step(model, tokens, mask, optimizer, device):
    optimizer.zero_grad()
    
    # Sample time using warping
    t = model.sample_time(tokens.shape[0], tokens.device)

    # Get clean embeddings
    x0 = model.embedding(tokens)
  
    
    # Add noise
    noise = model.get_noise(x0, t)

    xt = x0 + noise

    
    # Get model predictions
    logits = model(xt, mask, t)

    
    # Compute loss
    loss = F.cross_entropy(
        logits.view(-1, logits.size(-1)),
        tokens.view(-1),
        ignore_index=0
    )

    if not torch.isnan(loss):
        model.update_time_warping(t, loss.detach())
        loss.backward()
        optimizer.step()
    
    return loss.item()

def validation_step(model, tokens, mask, device):
    # Sample time using warping
    t = model.sample_time(tokens.shape[0], tokens.device)
    
    # Get clean embeddings
    x0 = model.embedding(tokens)
    
    # Add noise according to N(0, σt²)
    noise = model.get_noise(x0, t)
    xt = x0 + noise
    
    # Get model predictions
    logits = model(xt, mask, t)
    
    # Compute cross-entropy loss with padding handling
    loss = F.cross_entropy(
        logits.view(-1, logits.size(-1)),
        tokens.view(-1),
        ignore_index=0  # Assuming 0 is padding token
    )
    
    return loss.item()

def save_checkpoint(model, optimizer, scheduler, epoch, train_loss, val_loss):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
        'train_loss': train_loss,
        'val_loss': val_loss,
        
    }
    torch.save(checkpoint, 'best_model.pt')

def log_metrics(metrics_dict, step_type='batch'):
    wandb.log(metrics_dict)

def train_epoch(model, train_loader, optimizer, device, epoch):
    model.train()
    train_loss = 0
    train_bar = tqdm(train_loader, desc=f'Training Epoch {epoch}')
    
    for batch_idx, (tokens, mask) in enumerate(train_bar):
        tokens = tokens.to(device)
        mask = mask.to(device)
        
        loss = train_step(model, tokens, mask, optimizer, device)
        train_loss += loss
        
        train_bar.set_postfix({'loss': f'{loss:.4f}'})
        log_metrics({
            'train/batch_loss': loss,
            'train/learning_rate': optimizer.param_groups[0]['lr'],
            'epoch': epoch,
            'batch': batch_idx
        })
    
    return train_loss / len(train_loader)

def validate_epoch(model, test_loader, device, epoch):
    model.eval()
    val_loss = 0
    val_bar = tqdm(test_loader, desc=f'Validation Epoch {epoch}')
    
    # Collect real sequences
    real_sequences = []
    with torch.no_grad():
        for tokens, mask in val_bar:
            tokens = tokens.to(device)
            mask = mask.to(device)
            
            loss = validation_step(model, tokens, mask, device)
            val_loss += loss
            val_bar.set_postfix({'loss': f'{loss:.4f}'})
            
            real_sequences.extend([seq[seq != 0].cpu().numpy() for seq in tokens])

    
    return val_loss / len(test_loader)



def train_and_validate(model, train_loader, test_loader, optimizer, num_epochs, device, use_lr_scheduling=True):
    metrics = TrainingMetrics()
    
    scheduler = None
    if use_lr_scheduling:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', patience=3, factor=0.5, verbose=True
        )
    
    for epoch in range(num_epochs):
        # Training phase
        avg_train_loss = train_epoch(model, train_loader, optimizer, device, epoch)
        log_metrics({'train/epoch_loss': avg_train_loss, 'epoch': epoch})
         
        # Validation phase (every 5 epochs)
        if epoch % 1 == 0:
            avg_val_loss = validate_epoch(model, test_loader, device, epoch)
            
            log_metrics({
                'val/epoch_loss': avg_val_loss,
                'epoch': epoch
            })
            
            print(f'\nEpoch {epoch}:')
            print(f'Average Train Loss: {avg_train_loss:.4f}')
            print(f'Average Val Loss: {avg_val_loss:.4f}')
         
            
            if scheduler:
                scheduler.step(avg_val_loss)
            
            if metrics.update_best_metrics(avg_val_loss):
                save_checkpoint(model, optimizer, scheduler, epoch, avg_train_loss, avg_val_loss)
                log_metrics({
                    'best_model/val_loss': avg_val_loss,
                    'best_model/train_loss': avg_train_loss,
                    'best_model/epoch': epoch
                })
        else:
            print(f'\nEpoch {epoch}: Average Train Loss: {avg_train_loss:.4f}\n')


def train_step(model, tokens, mask, optimizer, device):
    optimizer.zero_grad()
    
    t = model.sample_time(tokens.shape[0], tokens.device)
    
    # Get bin assignments and importance weights
    bin_idx = model.time_warping.get_bin_assignment(t)
    importance_weights = model.time_warping.get_importance_weights(bin_idx)
    
    x0 = model.embedding(tokens)
    noise = model.get_noise(x0, t)
    xt = x0 + noise
    logits = model(xt, mask, t)
    
    loss = F.cross_entropy(
        logits.view(-1, logits.size(-1)),
        tokens.view(-1),
        ignore_index=0
    )

    if not torch.isnan(loss):
        # Collect statistics for time warping
        model.time_warping.collect_statistics(t, loss.detach().expand(tokens.shape[0]))
        
        # Apply importance weights to loss
        weighted_loss = loss * importance_weights.mean()
        weighted_loss.backward()
        optimizer.step()
    
    return loss.item()

def train_epoch(model, train_loader, optimizer, device, epoch):
    model.train()
    train_loss = 0
    train_bar = tqdm(train_loader, desc=f'Training Epoch {epoch}')
    
    for batch_idx, (tokens, mask) in enumerate(train_bar):
        tokens = tokens.to(device)
        mask = mask.to(device)
        
        loss = train_step(model, tokens, mask, optimizer, device)
        train_loss += loss
        train_bar.set_postfix({'loss': f'{loss:.4f}'})
        
        log_metrics({
            'train/batch_loss': loss,
            'train/learning_rate': optimizer.param_groups[0]['lr'],
            'epoch': epoch,
            'batch': batch_idx
        })
    
    # Update time warping at end of epoch using accumulated statistics
    model.time_warping.update_warping()
    
    return train_loss / len(train_loader)


In [14]:
checkpoint = torch.load(f"/mnt/mnemo9/mpelus/matlas/cdcd_multi_train/cdcd_hmp/artifacts/best_model_aiicxkad:v0/tmp21qdlph1.pt")
# Initialize model with the same parameters you showed
embed_dim = 96
num_layers = 4
num_heads = 12
dim_feedforward = 28
num_fourier_features = 4

model = CategoricalScoreDiffusion(
    vocab_size=vocab_size,
    embed_dim=embed_dim,
    num_layers=num_layers,
    num_heads=num_heads,
    dim_feedforward=dim_feedforward,
    num_fourier_features=num_fourier_features
)


model.load_state_dict(checkpoint)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Move model to device
model = model.to(device)

  checkpoint = torch.load(f"/mnt/mnemo9/mpelus/matlas/cdcd_multi_train/cdcd_hmp/artifacts/best_model_aiicxkad:v0/tmp21qdlph1.pt")


In [12]:
# Initialize model
embed_dim =200 #8 
num_layers = 1 #5
num_heads = 20
dim_feedforward = 50 #32
num_fourier_features = 16# going from 4 to 8 destabilised the batch loss but seems o have resulted in a faster convergence and lower
model = CategoricalScoreDiffusion(
    vocab_size=vocab_size,
    embed_dim=embed_dim,
    num_layers=num_layers,
    num_heads=num_heads,
    dim_feedforward=dim_feedforward,
    num_fourier_features=num_fourier_features
    
)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Move model to device
model = model.to(device)


In [11]:
import wandb
num_epochs = 200
learning_rate = 0.001137283411547948

wandb.finish()
wandb.init(
    project="diffusion-hmp",
    config={
        "learning_rate": learning_rate,
        "architecture": "restart",
        "dataset": "hmp",
        "epochs": num_epochs,
        "embed_dim": embed_dim,
        "num_layers": num_layers,
        "num_heads": num_heads,
        "dim_feedforward": dim_feedforward,
        "vocab_size": vocab_size,
        "num_fourier_features":num_fourier_features
    }
)

In [15]:
# Training parameters
# Initialize optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
# Start training
train_and_validate(model, train_loader, test_loader, optimizer, num_epochs, device)

Training Epoch 0: 100%|██████████| 811/811 [00:11<00:00, 73.53it/s, loss=4.5485]
  output = torch._nested_tensor_from_mask(
Validation Epoch 0: 100%|██████████| 203/203 [00:01<00:00, 174.13it/s, loss=3.1994]



Epoch 0:
Average Train Loss: 3.9258
Average Val Loss: 3.8820


Training Epoch 1: 100%|██████████| 811/811 [00:10<00:00, 74.95it/s, loss=4.5895]
Validation Epoch 1: 100%|██████████| 203/203 [00:01<00:00, 184.26it/s, loss=3.8889]



Epoch 1:
Average Train Loss: 3.8759
Average Val Loss: 3.8146


Training Epoch 2: 100%|██████████| 811/811 [00:12<00:00, 63.38it/s, loss=4.3547]
Validation Epoch 2: 100%|██████████| 203/203 [00:01<00:00, 184.48it/s, loss=4.7084]



Epoch 2:
Average Train Loss: 3.9026
Average Val Loss: 3.8213


Training Epoch 3: 100%|██████████| 811/811 [00:11<00:00, 70.99it/s, loss=3.9107]
Validation Epoch 3: 100%|██████████| 203/203 [00:01<00:00, 180.68it/s, loss=4.3766]



Epoch 3:
Average Train Loss: 3.8847
Average Val Loss: 3.8348


Training Epoch 4: 100%|██████████| 811/811 [00:11<00:00, 70.72it/s, loss=3.5541]
Validation Epoch 4: 100%|██████████| 203/203 [00:01<00:00, 181.39it/s, loss=2.9296]



Epoch 4:
Average Train Loss: 3.8647
Average Val Loss: 3.8698


Training Epoch 5: 100%|██████████| 811/811 [00:12<00:00, 65.62it/s, loss=2.5681]
Validation Epoch 5: 100%|██████████| 203/203 [00:01<00:00, 183.01it/s, loss=4.7860]



Epoch 5:
Average Train Loss: 3.9051
Average Val Loss: 3.8930


Training Epoch 6: 100%|██████████| 811/811 [00:13<00:00, 60.99it/s, loss=4.2725]
Validation Epoch 6: 100%|██████████| 203/203 [00:01<00:00, 183.73it/s, loss=3.9994]



Epoch 6:
Average Train Loss: 3.8666
Average Val Loss: 3.9342


Training Epoch 7: 100%|██████████| 811/811 [00:13<00:00, 60.52it/s, loss=4.6578]
Validation Epoch 7: 100%|██████████| 203/203 [00:01<00:00, 185.40it/s, loss=4.1456]



Epoch 7:
Average Train Loss: 3.8734
Average Val Loss: 3.8784


Training Epoch 8: 100%|██████████| 811/811 [00:12<00:00, 63.72it/s, loss=4.5266]
Validation Epoch 8: 100%|██████████| 203/203 [00:01<00:00, 184.07it/s, loss=4.6800]



Epoch 8:
Average Train Loss: 3.8634
Average Val Loss: 3.8250


Training Epoch 9: 100%|██████████| 811/811 [00:13<00:00, 60.76it/s, loss=3.8787]
Validation Epoch 9: 100%|██████████| 203/203 [00:01<00:00, 184.24it/s, loss=3.4765]



Epoch 9:
Average Train Loss: 3.8674
Average Val Loss: 3.8179


Training Epoch 10: 100%|██████████| 811/811 [00:13<00:00, 61.97it/s, loss=4.0235]
Validation Epoch 10: 100%|██████████| 203/203 [00:01<00:00, 183.49it/s, loss=2.8154]



Epoch 10:
Average Train Loss: 3.8461
Average Val Loss: 3.7798


Training Epoch 11: 100%|██████████| 811/811 [00:11<00:00, 69.67it/s, loss=4.6426]
Validation Epoch 11: 100%|██████████| 203/203 [00:01<00:00, 183.55it/s, loss=3.2636]



Epoch 11:
Average Train Loss: 3.8787
Average Val Loss: 3.7985


Training Epoch 12: 100%|██████████| 811/811 [00:13<00:00, 60.66it/s, loss=4.2473]
Validation Epoch 12: 100%|██████████| 203/203 [00:01<00:00, 182.65it/s, loss=3.5135]



Epoch 12:
Average Train Loss: 3.8375
Average Val Loss: 3.7483


Training Epoch 13: 100%|██████████| 811/811 [00:12<00:00, 62.81it/s, loss=2.5409]
Validation Epoch 13: 100%|██████████| 203/203 [00:01<00:00, 183.99it/s, loss=4.5002]



Epoch 13:
Average Train Loss: 3.8453
Average Val Loss: 3.8353


Training Epoch 14: 100%|██████████| 811/811 [00:09<00:00, 81.37it/s, loss=3.8765]
Validation Epoch 14: 100%|██████████| 203/203 [00:01<00:00, 185.66it/s, loss=3.8212]



Epoch 14:
Average Train Loss: 3.8364
Average Val Loss: 3.8200


Training Epoch 15: 100%|██████████| 811/811 [00:12<00:00, 67.18it/s, loss=2.7300]
Validation Epoch 15: 100%|██████████| 203/203 [00:01<00:00, 183.21it/s, loss=3.3378]



Epoch 15:
Average Train Loss: 3.8849
Average Val Loss: 3.8676


Training Epoch 16: 100%|██████████| 811/811 [00:10<00:00, 77.13it/s, loss=3.5352]
Validation Epoch 16: 100%|██████████| 203/203 [00:01<00:00, 183.27it/s, loss=3.8891]



Epoch 16:
Average Train Loss: 3.8363
Average Val Loss: 3.8753


Training Epoch 17: 100%|██████████| 811/811 [00:11<00:00, 71.42it/s, loss=3.1731]
Validation Epoch 17: 100%|██████████| 203/203 [00:01<00:00, 185.62it/s, loss=3.1386]



Epoch 17:
Average Train Loss: 3.8283
Average Val Loss: 3.8791


Training Epoch 18: 100%|██████████| 811/811 [00:10<00:00, 77.72it/s, loss=3.3496]
Validation Epoch 18: 100%|██████████| 203/203 [00:01<00:00, 186.12it/s, loss=4.2048]



Epoch 18:
Average Train Loss: 3.8511
Average Val Loss: 3.8312


Training Epoch 19: 100%|██████████| 811/811 [00:11<00:00, 70.92it/s, loss=4.0455]
Validation Epoch 19: 100%|██████████| 203/203 [00:01<00:00, 170.46it/s, loss=3.6602]



Epoch 19:
Average Train Loss: 3.8583
Average Val Loss: 3.8642


Training Epoch 20: 100%|██████████| 811/811 [00:10<00:00, 78.74it/s, loss=4.0460]
Validation Epoch 20: 100%|██████████| 203/203 [00:01<00:00, 146.47it/s, loss=4.7962]



Epoch 20:
Average Train Loss: 3.8193
Average Val Loss: 3.8676


Training Epoch 21: 100%|██████████| 811/811 [00:10<00:00, 78.81it/s, loss=4.4692]
Validation Epoch 21: 100%|██████████| 203/203 [00:01<00:00, 173.07it/s, loss=4.4298]



Epoch 21:
Average Train Loss: 3.8332
Average Val Loss: 3.7898


Training Epoch 22: 100%|██████████| 811/811 [00:10<00:00, 78.92it/s, loss=2.7967]
Validation Epoch 22: 100%|██████████| 203/203 [00:01<00:00, 172.61it/s, loss=4.2797]



Epoch 22:
Average Train Loss: 3.8437
Average Val Loss: 3.8735


Training Epoch 23: 100%|██████████| 811/811 [00:10<00:00, 78.81it/s, loss=4.5054]
Validation Epoch 23: 100%|██████████| 203/203 [00:01<00:00, 174.89it/s, loss=4.3104]



Epoch 23:
Average Train Loss: 3.8524
Average Val Loss: 3.8280


Training Epoch 24: 100%|██████████| 811/811 [00:10<00:00, 77.91it/s, loss=4.1421]
Validation Epoch 24: 100%|██████████| 203/203 [00:01<00:00, 183.52it/s, loss=4.3667]



Epoch 24:
Average Train Loss: 3.8523
Average Val Loss: 3.8188


Training Epoch 25: 100%|██████████| 811/811 [00:09<00:00, 85.15it/s, loss=4.4832]
Validation Epoch 25: 100%|██████████| 203/203 [00:01<00:00, 184.17it/s, loss=4.0989]



Epoch 25:
Average Train Loss: 3.8366
Average Val Loss: 3.8239


Training Epoch 26: 100%|██████████| 811/811 [00:10<00:00, 79.87it/s, loss=4.1337]
Validation Epoch 26: 100%|██████████| 203/203 [00:01<00:00, 184.85it/s, loss=2.9651]



Epoch 26:
Average Train Loss: 3.8408
Average Val Loss: 3.8458


Training Epoch 27: 100%|██████████| 811/811 [00:10<00:00, 73.90it/s, loss=3.5406]
Validation Epoch 27: 100%|██████████| 203/203 [00:01<00:00, 170.17it/s, loss=3.9701]



Epoch 27:
Average Train Loss: 3.8285
Average Val Loss: 3.8564


Training Epoch 28: 100%|██████████| 811/811 [00:13<00:00, 59.64it/s, loss=4.3205]
Validation Epoch 28: 100%|██████████| 203/203 [00:01<00:00, 170.84it/s, loss=4.1135]



Epoch 28:
Average Train Loss: 3.8072
Average Val Loss: 3.7506


Training Epoch 29: 100%|██████████| 811/811 [00:13<00:00, 60.09it/s, loss=2.8311]
Validation Epoch 29: 100%|██████████| 203/203 [00:01<00:00, 172.63it/s, loss=3.8531]



Epoch 29:
Average Train Loss: 3.8246
Average Val Loss: 3.7915


Training Epoch 30: 100%|██████████| 811/811 [00:13<00:00, 59.48it/s, loss=3.8341]
Validation Epoch 30: 100%|██████████| 203/203 [00:01<00:00, 172.20it/s, loss=3.7551]



Epoch 30:
Average Train Loss: 3.8340
Average Val Loss: 3.8422


Training Epoch 31: 100%|██████████| 811/811 [00:13<00:00, 60.42it/s, loss=3.0803]
Validation Epoch 31: 100%|██████████| 203/203 [00:01<00:00, 183.71it/s, loss=4.0813]



Epoch 31:
Average Train Loss: 3.8159
Average Val Loss: 3.8656


Training Epoch 32: 100%|██████████| 811/811 [00:13<00:00, 61.86it/s, loss=3.7958]
Validation Epoch 32: 100%|██████████| 203/203 [00:01<00:00, 184.25it/s, loss=2.8175]



Epoch 32:
Average Train Loss: 3.8335
Average Val Loss: 3.8272


Training Epoch 33: 100%|██████████| 811/811 [00:12<00:00, 65.30it/s, loss=3.8740]
Validation Epoch 33: 100%|██████████| 203/203 [00:01<00:00, 183.93it/s, loss=4.4043]



Epoch 33:
Average Train Loss: 3.8058
Average Val Loss: 3.8467


Training Epoch 34: 100%|██████████| 811/811 [00:12<00:00, 62.39it/s, loss=2.6885]
Validation Epoch 34: 100%|██████████| 203/203 [00:01<00:00, 175.90it/s, loss=4.3223]



Epoch 34:
Average Train Loss: 3.8196
Average Val Loss: 3.8575


Training Epoch 35: 100%|██████████| 811/811 [00:13<00:00, 60.27it/s, loss=4.6486]
Validation Epoch 35: 100%|██████████| 203/203 [00:01<00:00, 183.59it/s, loss=4.2404]



Epoch 35:
Average Train Loss: 3.8013
Average Val Loss: 3.7576


Training Epoch 36: 100%|██████████| 811/811 [00:13<00:00, 59.36it/s, loss=3.8867]
Validation Epoch 36: 100%|██████████| 203/203 [00:01<00:00, 183.80it/s, loss=4.7291]



Epoch 36:
Average Train Loss: 3.8317
Average Val Loss: 3.8223


Training Epoch 37: 100%|██████████| 811/811 [00:13<00:00, 61.66it/s, loss=4.3855]
Validation Epoch 37: 100%|██████████| 203/203 [00:01<00:00, 182.53it/s, loss=4.3296]



Epoch 37:
Average Train Loss: 3.8111
Average Val Loss: 3.7989


Training Epoch 38: 100%|██████████| 811/811 [00:10<00:00, 79.22it/s, loss=3.7916]
Validation Epoch 38: 100%|██████████| 203/203 [00:01<00:00, 184.06it/s, loss=4.2277]



Epoch 38:
Average Train Loss: 3.7965
Average Val Loss: 3.8665


Training Epoch 39: 100%|██████████| 811/811 [00:13<00:00, 59.12it/s, loss=4.7544]
Validation Epoch 39: 100%|██████████| 203/203 [00:01<00:00, 182.38it/s, loss=3.8190]



Epoch 39:
Average Train Loss: 3.8222
Average Val Loss: 3.8779


Training Epoch 40: 100%|██████████| 811/811 [00:13<00:00, 60.28it/s, loss=3.3100]
Validation Epoch 40: 100%|██████████| 203/203 [00:01<00:00, 183.70it/s, loss=4.9840]



Epoch 40:
Average Train Loss: 3.8229
Average Val Loss: 3.8417


Training Epoch 41: 100%|██████████| 811/811 [00:13<00:00, 59.76it/s, loss=4.0468]
Validation Epoch 41: 100%|██████████| 203/203 [00:01<00:00, 183.31it/s, loss=3.5079]



Epoch 41:
Average Train Loss: 3.8439
Average Val Loss: 3.8012


Training Epoch 42: 100%|██████████| 811/811 [00:13<00:00, 59.50it/s, loss=3.6734]
Validation Epoch 42: 100%|██████████| 203/203 [00:01<00:00, 183.46it/s, loss=4.3380]



Epoch 42:
Average Train Loss: 3.8345
Average Val Loss: 3.8293


Training Epoch 43: 100%|██████████| 811/811 [00:13<00:00, 59.57it/s, loss=4.7395]
Validation Epoch 43: 100%|██████████| 203/203 [00:01<00:00, 183.10it/s, loss=4.5943]



Epoch 43:
Average Train Loss: 3.8059
Average Val Loss: 3.8315


Training Epoch 44: 100%|██████████| 811/811 [00:13<00:00, 59.64it/s, loss=4.1542]
Validation Epoch 44: 100%|██████████| 203/203 [00:01<00:00, 184.07it/s, loss=3.6555]



Epoch 44:
Average Train Loss: 3.8409
Average Val Loss: 3.8780


Training Epoch 45: 100%|██████████| 811/811 [00:13<00:00, 59.69it/s, loss=4.5796]
Validation Epoch 45: 100%|██████████| 203/203 [00:01<00:00, 184.79it/s, loss=3.2710]



Epoch 45:
Average Train Loss: 3.8257
Average Val Loss: 3.7621


Training Epoch 46: 100%|██████████| 811/811 [00:13<00:00, 59.76it/s, loss=4.6970]
Validation Epoch 46: 100%|██████████| 203/203 [00:01<00:00, 184.74it/s, loss=3.0766]



Epoch 46:
Average Train Loss: 3.8189
Average Val Loss: 3.7975


Training Epoch 47: 100%|██████████| 811/811 [00:12<00:00, 66.67it/s, loss=3.5238]
Validation Epoch 47: 100%|██████████| 203/203 [00:01<00:00, 183.73it/s, loss=1.5809]



Epoch 47:
Average Train Loss: 3.8148
Average Val Loss: 3.8022


Training Epoch 48: 100%|██████████| 811/811 [00:10<00:00, 79.92it/s, loss=3.8476]
Validation Epoch 48: 100%|██████████| 203/203 [00:01<00:00, 181.49it/s, loss=3.5666]



Epoch 48:
Average Train Loss: 3.8312
Average Val Loss: 3.7326


Training Epoch 49: 100%|██████████| 811/811 [00:10<00:00, 78.97it/s, loss=4.3779]
Validation Epoch 49: 100%|██████████| 203/203 [00:01<00:00, 184.12it/s, loss=2.3097]



Epoch 49:
Average Train Loss: 3.8267
Average Val Loss: 3.8094


Training Epoch 50: 100%|██████████| 811/811 [00:11<00:00, 73.14it/s, loss=4.4810]
Validation Epoch 50: 100%|██████████| 203/203 [00:01<00:00, 175.59it/s, loss=2.9420]



Epoch 50:
Average Train Loss: 3.8160
Average Val Loss: 3.8159


Training Epoch 51: 100%|██████████| 811/811 [00:10<00:00, 74.56it/s, loss=4.3479]
Validation Epoch 51: 100%|██████████| 203/203 [00:01<00:00, 178.24it/s, loss=4.1459]



Epoch 51:
Average Train Loss: 3.8178
Average Val Loss: 3.8025


Training Epoch 52: 100%|██████████| 811/811 [00:11<00:00, 72.56it/s, loss=4.6027]
Validation Epoch 52: 100%|██████████| 203/203 [00:01<00:00, 180.25it/s, loss=4.1712]



Epoch 52:
Average Train Loss: 3.8190
Average Val Loss: 3.8334


Training Epoch 53: 100%|██████████| 811/811 [00:09<00:00, 83.40it/s, loss=4.0094]
Validation Epoch 53: 100%|██████████| 203/203 [00:01<00:00, 184.16it/s, loss=3.3631]



Epoch 53:
Average Train Loss: 3.8152
Average Val Loss: 3.8364


Training Epoch 54: 100%|██████████| 811/811 [00:09<00:00, 86.79it/s, loss=3.2085]
Validation Epoch 54: 100%|██████████| 203/203 [00:01<00:00, 184.58it/s, loss=4.6210]



Epoch 54:
Average Train Loss: 3.8184
Average Val Loss: 3.8635


Training Epoch 55: 100%|██████████| 811/811 [00:09<00:00, 86.65it/s, loss=3.9353]
Validation Epoch 55: 100%|██████████| 203/203 [00:01<00:00, 183.70it/s, loss=4.0045]



Epoch 55:
Average Train Loss: 3.8392
Average Val Loss: 3.8204


Training Epoch 56: 100%|██████████| 811/811 [00:09<00:00, 86.49it/s, loss=3.8663]
Validation Epoch 56: 100%|██████████| 203/203 [00:01<00:00, 184.19it/s, loss=4.0725]



Epoch 56:
Average Train Loss: 3.8094
Average Val Loss: 3.8344


Training Epoch 57: 100%|██████████| 811/811 [00:09<00:00, 86.64it/s, loss=3.3196]
Validation Epoch 57: 100%|██████████| 203/203 [00:01<00:00, 184.20it/s, loss=3.7066]



Epoch 57:
Average Train Loss: 3.8591
Average Val Loss: 3.8027


Training Epoch 58: 100%|██████████| 811/811 [00:09<00:00, 81.55it/s, loss=4.0729]
Validation Epoch 58: 100%|██████████| 203/203 [00:01<00:00, 185.47it/s, loss=4.1107]



Epoch 58:
Average Train Loss: 3.8138
Average Val Loss: 3.7852


Training Epoch 59: 100%|██████████| 811/811 [00:12<00:00, 65.88it/s, loss=4.5224]
Validation Epoch 59: 100%|██████████| 203/203 [00:01<00:00, 184.04it/s, loss=3.3537]



Epoch 59:
Average Train Loss: 3.8307
Average Val Loss: 3.7288


Training Epoch 60: 100%|██████████| 811/811 [00:10<00:00, 74.56it/s, loss=3.9715]
Validation Epoch 60: 100%|██████████| 203/203 [00:01<00:00, 184.03it/s, loss=4.7760]



Epoch 60:
Average Train Loss: 3.7936
Average Val Loss: 3.8475


Training Epoch 61: 100%|██████████| 811/811 [00:12<00:00, 64.09it/s, loss=2.2104]
Validation Epoch 61: 100%|██████████| 203/203 [00:01<00:00, 183.28it/s, loss=4.7133]



Epoch 61:
Average Train Loss: 3.8205
Average Val Loss: 3.8533


Training Epoch 62: 100%|██████████| 811/811 [00:10<00:00, 78.36it/s, loss=3.3156]
Validation Epoch 62: 100%|██████████| 203/203 [00:01<00:00, 174.60it/s, loss=2.1663]



Epoch 62:
Average Train Loss: 3.7852
Average Val Loss: 3.8877


Training Epoch 63: 100%|██████████| 811/811 [00:10<00:00, 79.35it/s, loss=4.2275]
Validation Epoch 63: 100%|██████████| 203/203 [00:01<00:00, 170.42it/s, loss=4.6750]



Epoch 63:
Average Train Loss: 3.8040
Average Val Loss: 3.8427


Training Epoch 64: 100%|██████████| 811/811 [00:09<00:00, 82.13it/s, loss=4.6314]
Validation Epoch 64: 100%|██████████| 203/203 [00:01<00:00, 183.57it/s, loss=4.2544]



Epoch 64:
Average Train Loss: 3.8152
Average Val Loss: 3.8080


Training Epoch 65: 100%|██████████| 811/811 [00:09<00:00, 84.19it/s, loss=4.6194]
Validation Epoch 65: 100%|██████████| 203/203 [00:01<00:00, 183.91it/s, loss=2.7091]



Epoch 65:
Average Train Loss: 3.7998
Average Val Loss: 3.8127


Training Epoch 66: 100%|██████████| 811/811 [00:09<00:00, 83.89it/s, loss=4.1717]
Validation Epoch 66: 100%|██████████| 203/203 [00:01<00:00, 184.30it/s, loss=3.0995]



Epoch 66:
Average Train Loss: 3.8316
Average Val Loss: 3.7847


Training Epoch 67: 100%|██████████| 811/811 [00:11<00:00, 73.06it/s, loss=2.1479]
Validation Epoch 67: 100%|██████████| 203/203 [00:01<00:00, 185.66it/s, loss=3.9360]



Epoch 67:
Average Train Loss: 3.8078
Average Val Loss: 3.8275


Training Epoch 68: 100%|██████████| 811/811 [00:10<00:00, 80.48it/s, loss=4.5022]
Validation Epoch 68: 100%|██████████| 203/203 [00:01<00:00, 188.70it/s, loss=3.7494]



Epoch 68:
Average Train Loss: 3.7901
Average Val Loss: 3.7782


Training Epoch 69: 100%|██████████| 811/811 [00:10<00:00, 80.57it/s, loss=3.8221]
Validation Epoch 69: 100%|██████████| 203/203 [00:01<00:00, 186.00it/s, loss=4.4994]



Epoch 69:
Average Train Loss: 3.8358
Average Val Loss: 3.8344


Training Epoch 70: 100%|██████████| 811/811 [00:10<00:00, 79.75it/s, loss=3.8810]
Validation Epoch 70: 100%|██████████| 203/203 [00:01<00:00, 183.67it/s, loss=4.6826]



Epoch 70:
Average Train Loss: 3.8055
Average Val Loss: 3.7633


Training Epoch 71: 100%|██████████| 811/811 [00:12<00:00, 62.47it/s, loss=4.4901]
Validation Epoch 71: 100%|██████████| 203/203 [00:01<00:00, 186.08it/s, loss=3.8657]



Epoch 71:
Average Train Loss: 3.7920
Average Val Loss: 3.7842


Training Epoch 72: 100%|██████████| 811/811 [00:11<00:00, 71.96it/s, loss=4.0670]
Validation Epoch 72: 100%|██████████| 203/203 [00:01<00:00, 184.65it/s, loss=3.8551]



Epoch 72:
Average Train Loss: 3.8054
Average Val Loss: 3.8128


Training Epoch 73: 100%|██████████| 811/811 [00:13<00:00, 60.18it/s, loss=2.8683]
Validation Epoch 73: 100%|██████████| 203/203 [00:01<00:00, 184.06it/s, loss=2.9559]



Epoch 73:
Average Train Loss: 3.7791
Average Val Loss: 3.7830


Training Epoch 74: 100%|██████████| 811/811 [00:13<00:00, 60.01it/s, loss=3.9125]
Validation Epoch 74: 100%|██████████| 203/203 [00:01<00:00, 184.54it/s, loss=4.2090]



Epoch 74:
Average Train Loss: 3.8186
Average Val Loss: 3.8426


Training Epoch 75: 100%|██████████| 811/811 [00:13<00:00, 59.68it/s, loss=4.2179]
Validation Epoch 75: 100%|██████████| 203/203 [00:01<00:00, 184.11it/s, loss=4.4520]



Epoch 75:
Average Train Loss: 3.8248
Average Val Loss: 3.7920


Training Epoch 76: 100%|██████████| 811/811 [00:13<00:00, 59.43it/s, loss=4.1500]
Validation Epoch 76: 100%|██████████| 203/203 [00:01<00:00, 184.25it/s, loss=3.9955]



Epoch 76:
Average Train Loss: 3.8389
Average Val Loss: 3.7449


Training Epoch 77: 100%|██████████| 811/811 [00:13<00:00, 59.07it/s, loss=3.9936]
Validation Epoch 77: 100%|██████████| 203/203 [00:01<00:00, 183.61it/s, loss=3.9515]



Epoch 77:
Average Train Loss: 3.7829
Average Val Loss: 3.8943


Training Epoch 78: 100%|██████████| 811/811 [00:13<00:00, 59.70it/s, loss=3.3538]
Validation Epoch 78: 100%|██████████| 203/203 [00:01<00:00, 146.99it/s, loss=4.5166]



Epoch 78:
Average Train Loss: 3.8284
Average Val Loss: 3.9069


Training Epoch 79: 100%|██████████| 811/811 [00:09<00:00, 83.03it/s, loss=4.3421]
Validation Epoch 79: 100%|██████████| 203/203 [00:01<00:00, 183.41it/s, loss=3.7032]



Epoch 79:
Average Train Loss: 3.8381
Average Val Loss: 3.7188


Training Epoch 80: 100%|██████████| 811/811 [00:10<00:00, 77.11it/s, loss=3.4598]
Validation Epoch 80: 100%|██████████| 203/203 [00:01<00:00, 183.12it/s, loss=4.8076]



Epoch 80:
Average Train Loss: 3.8209
Average Val Loss: 3.8548


Training Epoch 81: 100%|██████████| 811/811 [00:10<00:00, 75.15it/s, loss=4.1671]
Validation Epoch 81: 100%|██████████| 203/203 [00:01<00:00, 178.24it/s, loss=4.0078]



Epoch 81:
Average Train Loss: 3.7951
Average Val Loss: 3.8080


Training Epoch 82: 100%|██████████| 811/811 [00:10<00:00, 76.27it/s, loss=2.8541]
Validation Epoch 82: 100%|██████████| 203/203 [00:01<00:00, 180.42it/s, loss=4.1202]



Epoch 82:
Average Train Loss: 3.8118
Average Val Loss: 3.7458


Training Epoch 83: 100%|██████████| 811/811 [00:11<00:00, 70.43it/s, loss=3.8416]
Validation Epoch 83: 100%|██████████| 203/203 [00:01<00:00, 183.86it/s, loss=3.5838]



Epoch 83:
Average Train Loss: 3.8147
Average Val Loss: 3.8060


Training Epoch 84: 100%|██████████| 811/811 [00:12<00:00, 62.40it/s, loss=3.7147]
Validation Epoch 84: 100%|██████████| 203/203 [00:01<00:00, 183.89it/s, loss=4.3890]



Epoch 84:
Average Train Loss: 3.8460
Average Val Loss: 3.7543


Training Epoch 85: 100%|██████████| 811/811 [00:10<00:00, 79.43it/s, loss=3.9785]
Validation Epoch 85: 100%|██████████| 203/203 [00:01<00:00, 185.20it/s, loss=3.3874]



Epoch 85:
Average Train Loss: 3.8403
Average Val Loss: 3.8760


Training Epoch 86: 100%|██████████| 811/811 [00:10<00:00, 77.46it/s, loss=3.3758]
Validation Epoch 86: 100%|██████████| 203/203 [00:01<00:00, 184.54it/s, loss=3.9986]



Epoch 86:
Average Train Loss: 3.7916
Average Val Loss: 3.8043


Training Epoch 87: 100%|██████████| 811/811 [00:12<00:00, 66.59it/s, loss=4.4933]
Validation Epoch 87: 100%|██████████| 203/203 [00:01<00:00, 183.31it/s, loss=3.9991]



Epoch 87:
Average Train Loss: 3.8224
Average Val Loss: 3.8660


Training Epoch 88: 100%|██████████| 811/811 [00:10<00:00, 73.94it/s, loss=4.5177]
Validation Epoch 88: 100%|██████████| 203/203 [00:01<00:00, 184.30it/s, loss=2.6245]



Epoch 88:
Average Train Loss: 3.8116
Average Val Loss: 3.7936


Training Epoch 89: 100%|██████████| 811/811 [00:10<00:00, 78.58it/s, loss=3.1767]
Validation Epoch 89: 100%|██████████| 203/203 [00:01<00:00, 184.48it/s, loss=1.1378]



Epoch 89:
Average Train Loss: 3.8261
Average Val Loss: 3.8444


Training Epoch 90: 100%|██████████| 811/811 [00:10<00:00, 79.29it/s, loss=3.8515]
Validation Epoch 90: 100%|██████████| 203/203 [00:01<00:00, 185.37it/s, loss=4.6555]



Epoch 90:
Average Train Loss: 3.8222
Average Val Loss: 3.8310


Training Epoch 91: 100%|██████████| 811/811 [00:10<00:00, 79.21it/s, loss=4.3877]
Validation Epoch 91: 100%|██████████| 203/203 [00:01<00:00, 185.15it/s, loss=4.5745]



Epoch 91:
Average Train Loss: 3.8369
Average Val Loss: 3.8244


Training Epoch 92: 100%|██████████| 811/811 [00:10<00:00, 79.25it/s, loss=4.1510]
Validation Epoch 92: 100%|██████████| 203/203 [00:01<00:00, 185.43it/s, loss=3.6982]



Epoch 92:
Average Train Loss: 3.8162
Average Val Loss: 3.7685


Training Epoch 93: 100%|██████████| 811/811 [00:10<00:00, 79.25it/s, loss=3.6431]
Validation Epoch 93: 100%|██████████| 203/203 [00:01<00:00, 184.44it/s, loss=3.9784]



Epoch 93:
Average Train Loss: 3.8046
Average Val Loss: 3.7906


Training Epoch 94: 100%|██████████| 811/811 [00:10<00:00, 79.41it/s, loss=4.4101]
Validation Epoch 94: 100%|██████████| 203/203 [00:01<00:00, 185.37it/s, loss=2.6100]



Epoch 94:
Average Train Loss: 3.8238
Average Val Loss: 3.7407


Training Epoch 95: 100%|██████████| 811/811 [00:11<00:00, 71.11it/s, loss=4.3363]
Validation Epoch 95: 100%|██████████| 203/203 [00:01<00:00, 186.19it/s, loss=3.9497]



Epoch 95:
Average Train Loss: 3.8106
Average Val Loss: 3.8096


Training Epoch 96: 100%|██████████| 811/811 [00:12<00:00, 66.07it/s, loss=2.9767]
Validation Epoch 96: 100%|██████████| 203/203 [00:01<00:00, 186.38it/s, loss=4.2336]



Epoch 96:
Average Train Loss: 3.8018
Average Val Loss: 3.8548


Training Epoch 97: 100%|██████████| 811/811 [00:09<00:00, 88.75it/s, loss=4.6327]
Validation Epoch 97: 100%|██████████| 203/203 [00:01<00:00, 186.15it/s, loss=3.6756]



Epoch 97:
Average Train Loss: 3.8303
Average Val Loss: 3.8128


Training Epoch 98: 100%|██████████| 811/811 [00:09<00:00, 85.43it/s, loss=3.0321]
Validation Epoch 98: 100%|██████████| 203/203 [00:01<00:00, 185.95it/s, loss=4.4025]



Epoch 98:
Average Train Loss: 3.8279
Average Val Loss: 3.7892


Training Epoch 99: 100%|██████████| 811/811 [00:09<00:00, 85.56it/s, loss=4.7989]
Validation Epoch 99: 100%|██████████| 203/203 [00:01<00:00, 186.70it/s, loss=3.6232]



Epoch 99:
Average Train Loss: 3.8323
Average Val Loss: 3.7761


Training Epoch 100: 100%|██████████| 811/811 [00:10<00:00, 80.01it/s, loss=3.0753]
Validation Epoch 100: 100%|██████████| 203/203 [00:01<00:00, 187.43it/s, loss=2.7837]



Epoch 100:
Average Train Loss: 3.8397
Average Val Loss: 3.8329


Training Epoch 101: 100%|██████████| 811/811 [00:10<00:00, 80.85it/s, loss=4.1900]
Validation Epoch 101: 100%|██████████| 203/203 [00:01<00:00, 187.60it/s, loss=3.5186]



Epoch 101:
Average Train Loss: 3.7920
Average Val Loss: 3.8472


Training Epoch 102: 100%|██████████| 811/811 [00:10<00:00, 77.41it/s, loss=4.3436]
Validation Epoch 102: 100%|██████████| 203/203 [00:01<00:00, 186.40it/s, loss=3.1359]



Epoch 102:
Average Train Loss: 3.7944
Average Val Loss: 3.8097


Training Epoch 103: 100%|██████████| 811/811 [00:10<00:00, 77.51it/s, loss=3.8558]
Validation Epoch 103: 100%|██████████| 203/203 [00:01<00:00, 162.92it/s, loss=4.3330]



Epoch 103:
Average Train Loss: 3.8276
Average Val Loss: 3.7183


Training Epoch 104: 100%|██████████| 811/811 [00:10<00:00, 76.91it/s, loss=2.9934]
Validation Epoch 104: 100%|██████████| 203/203 [00:01<00:00, 170.24it/s, loss=4.4924]



Epoch 104:
Average Train Loss: 3.8400
Average Val Loss: 3.8774


Training Epoch 105: 100%|██████████| 811/811 [00:10<00:00, 78.24it/s, loss=4.3723]
Validation Epoch 105: 100%|██████████| 203/203 [00:01<00:00, 183.88it/s, loss=2.6393]



Epoch 105:
Average Train Loss: 3.7792
Average Val Loss: 3.8089


Training Epoch 106: 100%|██████████| 811/811 [00:11<00:00, 69.22it/s, loss=4.0141]
Validation Epoch 106: 100%|██████████| 203/203 [00:01<00:00, 179.29it/s, loss=4.3852]



Epoch 106:
Average Train Loss: 3.7959
Average Val Loss: 3.8025


Training Epoch 107: 100%|██████████| 811/811 [00:10<00:00, 77.28it/s, loss=4.5345]
Validation Epoch 107: 100%|██████████| 203/203 [00:01<00:00, 184.79it/s, loss=3.9163]



Epoch 107:
Average Train Loss: 3.8066
Average Val Loss: 3.7874


Training Epoch 108: 100%|██████████| 811/811 [00:10<00:00, 80.45it/s, loss=4.7936]
Validation Epoch 108: 100%|██████████| 203/203 [00:01<00:00, 174.93it/s, loss=4.5158]



Epoch 108:
Average Train Loss: 3.7974
Average Val Loss: 3.8075


Training Epoch 109: 100%|██████████| 811/811 [00:10<00:00, 75.76it/s, loss=4.2079]
Validation Epoch 109: 100%|██████████| 203/203 [00:01<00:00, 184.08it/s, loss=4.0224]



Epoch 109:
Average Train Loss: 3.8232
Average Val Loss: 3.7789


Training Epoch 110: 100%|██████████| 811/811 [00:09<00:00, 87.79it/s, loss=2.5335]
Validation Epoch 110: 100%|██████████| 203/203 [00:01<00:00, 184.00it/s, loss=3.4868]



Epoch 110:
Average Train Loss: 3.8040
Average Val Loss: 3.7778


Training Epoch 111: 100%|██████████| 811/811 [00:09<00:00, 84.46it/s, loss=4.4742]
Validation Epoch 111: 100%|██████████| 203/203 [00:01<00:00, 184.78it/s, loss=3.8033]



Epoch 111:
Average Train Loss: 3.7811
Average Val Loss: 3.8130


Training Epoch 112: 100%|██████████| 811/811 [00:09<00:00, 85.12it/s, loss=3.9265]
Validation Epoch 112: 100%|██████████| 203/203 [00:01<00:00, 183.89it/s, loss=3.2950]



Epoch 112:
Average Train Loss: 3.8182
Average Val Loss: 3.7910


Training Epoch 113: 100%|██████████| 811/811 [00:10<00:00, 77.17it/s, loss=3.6088]
Validation Epoch 113: 100%|██████████| 203/203 [00:01<00:00, 185.02it/s, loss=4.1014]



Epoch 113:
Average Train Loss: 3.7940
Average Val Loss: 3.7876


Training Epoch 114: 100%|██████████| 811/811 [00:09<00:00, 82.94it/s, loss=4.2888]
Validation Epoch 114: 100%|██████████| 203/203 [00:01<00:00, 185.39it/s, loss=3.1487]



Epoch 114:
Average Train Loss: 3.8204
Average Val Loss: 3.7777


Training Epoch 115: 100%|██████████| 811/811 [00:10<00:00, 78.73it/s, loss=4.5726]
Validation Epoch 115: 100%|██████████| 203/203 [00:01<00:00, 179.46it/s, loss=2.6948]



Epoch 115:
Average Train Loss: 3.7788
Average Val Loss: 3.7536


Training Epoch 116: 100%|██████████| 811/811 [00:10<00:00, 77.86it/s, loss=4.2071]
Validation Epoch 116: 100%|██████████| 203/203 [00:01<00:00, 166.87it/s, loss=4.4738]



Epoch 116:
Average Train Loss: 3.8270
Average Val Loss: 3.7924


Training Epoch 117: 100%|██████████| 811/811 [00:10<00:00, 75.23it/s, loss=4.5520]
Validation Epoch 117: 100%|██████████| 203/203 [00:01<00:00, 173.09it/s, loss=3.9070]



Epoch 117:
Average Train Loss: 3.7660
Average Val Loss: 3.7742


Training Epoch 118: 100%|██████████| 811/811 [00:10<00:00, 75.66it/s, loss=2.8284]
Validation Epoch 118: 100%|██████████| 203/203 [00:01<00:00, 179.19it/s, loss=4.7273]



Epoch 118:
Average Train Loss: 3.7784
Average Val Loss: 3.8376


Training Epoch 119: 100%|██████████| 811/811 [00:10<00:00, 77.84it/s, loss=4.6207]
Validation Epoch 119: 100%|██████████| 203/203 [00:01<00:00, 183.63it/s, loss=3.8553]



Epoch 119:
Average Train Loss: 3.7892
Average Val Loss: 3.7145


Training Epoch 120: 100%|██████████| 811/811 [00:10<00:00, 74.87it/s, loss=2.7299]
Validation Epoch 120: 100%|██████████| 203/203 [00:01<00:00, 181.57it/s, loss=3.8499]



Epoch 120:
Average Train Loss: 3.8002
Average Val Loss: 3.7811


Training Epoch 121: 100%|██████████| 811/811 [00:10<00:00, 78.52it/s, loss=4.6206]
Validation Epoch 121: 100%|██████████| 203/203 [00:01<00:00, 177.52it/s, loss=4.2502]



Epoch 121:
Average Train Loss: 3.7900
Average Val Loss: 3.8143


Training Epoch 122: 100%|██████████| 811/811 [00:10<00:00, 76.96it/s, loss=4.0669]
Validation Epoch 122: 100%|██████████| 203/203 [00:01<00:00, 185.09it/s, loss=3.2501]



Epoch 122:
Average Train Loss: 3.7673
Average Val Loss: 3.8048


Training Epoch 123: 100%|██████████| 811/811 [00:10<00:00, 75.79it/s, loss=4.2069]
Validation Epoch 123: 100%|██████████| 203/203 [00:01<00:00, 170.26it/s, loss=3.7686]



Epoch 123:
Average Train Loss: 3.8249
Average Val Loss: 3.7912


Training Epoch 124: 100%|██████████| 811/811 [00:10<00:00, 75.75it/s, loss=4.2824]
Validation Epoch 124: 100%|██████████| 203/203 [00:01<00:00, 178.29it/s, loss=4.6770]



Epoch 124:
Average Train Loss: 3.8076
Average Val Loss: 3.8135


Training Epoch 125: 100%|██████████| 811/811 [00:10<00:00, 77.39it/s, loss=3.4267]
Validation Epoch 125: 100%|██████████| 203/203 [00:01<00:00, 180.33it/s, loss=3.8028]



Epoch 125:
Average Train Loss: 3.7848
Average Val Loss: 3.8207


Training Epoch 126: 100%|██████████| 811/811 [00:10<00:00, 76.04it/s, loss=4.3856]
Validation Epoch 126: 100%|██████████| 203/203 [00:01<00:00, 180.14it/s, loss=4.1220]



Epoch 126:
Average Train Loss: 3.7977
Average Val Loss: 3.7580


Training Epoch 127: 100%|██████████| 811/811 [00:10<00:00, 75.33it/s, loss=4.4496]
Validation Epoch 127: 100%|██████████| 203/203 [00:01<00:00, 181.51it/s, loss=2.9040]



Epoch 127:
Average Train Loss: 3.8103
Average Val Loss: 3.8504


Training Epoch 128: 100%|██████████| 811/811 [00:10<00:00, 78.60it/s, loss=3.0079]
Validation Epoch 128: 100%|██████████| 203/203 [00:01<00:00, 175.26it/s, loss=4.4385]



Epoch 128:
Average Train Loss: 3.8033
Average Val Loss: 3.7517


Training Epoch 129: 100%|██████████| 811/811 [00:10<00:00, 77.45it/s, loss=4.6004]
Validation Epoch 129: 100%|██████████| 203/203 [00:01<00:00, 184.31it/s, loss=4.3449]



Epoch 129:
Average Train Loss: 3.8058
Average Val Loss: 3.8573


Training Epoch 130: 100%|██████████| 811/811 [00:10<00:00, 79.19it/s, loss=2.6986]
Validation Epoch 130: 100%|██████████| 203/203 [00:01<00:00, 183.44it/s, loss=3.8173]



Epoch 130:
Average Train Loss: 3.8296
Average Val Loss: 3.7059


Training Epoch 131: 100%|██████████| 811/811 [00:11<00:00, 73.53it/s, loss=4.1298]
Validation Epoch 131: 100%|██████████| 203/203 [00:01<00:00, 170.66it/s, loss=2.8691]



Epoch 131:
Average Train Loss: 3.7959
Average Val Loss: 3.7724


Training Epoch 132: 100%|██████████| 811/811 [00:10<00:00, 76.62it/s, loss=3.7094]
Validation Epoch 132: 100%|██████████| 203/203 [00:01<00:00, 184.39it/s, loss=3.6788]



Epoch 132:
Average Train Loss: 3.8160
Average Val Loss: 3.7617


Training Epoch 133: 100%|██████████| 811/811 [00:09<00:00, 81.52it/s, loss=3.5213]
Validation Epoch 133: 100%|██████████| 203/203 [00:01<00:00, 179.34it/s, loss=3.1949]



Epoch 133:
Average Train Loss: 3.7813
Average Val Loss: 3.7682


Training Epoch 134: 100%|██████████| 811/811 [00:10<00:00, 75.41it/s, loss=4.5814]
Validation Epoch 134: 100%|██████████| 203/203 [00:01<00:00, 182.89it/s, loss=3.0554]



Epoch 134:
Average Train Loss: 3.8240
Average Val Loss: 3.8037


Training Epoch 135: 100%|██████████| 811/811 [00:10<00:00, 75.31it/s, loss=4.0065]
Validation Epoch 135: 100%|██████████| 203/203 [00:01<00:00, 144.24it/s, loss=3.6790]



Epoch 135:
Average Train Loss: 3.7607
Average Val Loss: 3.8299


Training Epoch 136: 100%|██████████| 811/811 [00:09<00:00, 87.17it/s, loss=4.0294]
Validation Epoch 136: 100%|██████████| 203/203 [00:01<00:00, 183.21it/s, loss=3.0217]



Epoch 136:
Average Train Loss: 3.8238
Average Val Loss: 3.8313


Training Epoch 137: 100%|██████████| 811/811 [00:09<00:00, 88.22it/s, loss=4.6623]
Validation Epoch 137: 100%|██████████| 203/203 [00:01<00:00, 183.05it/s, loss=3.9968]



Epoch 137:
Average Train Loss: 3.8314
Average Val Loss: 3.7643


Training Epoch 138: 100%|██████████| 811/811 [00:09<00:00, 85.99it/s, loss=3.5142]
Validation Epoch 138: 100%|██████████| 203/203 [00:01<00:00, 170.34it/s, loss=3.7573]



Epoch 138:
Average Train Loss: 3.8088
Average Val Loss: 3.8848


Training Epoch 139: 100%|██████████| 811/811 [00:10<00:00, 79.67it/s, loss=4.3693]
Validation Epoch 139: 100%|██████████| 203/203 [00:01<00:00, 185.34it/s, loss=4.8890]



Epoch 139:
Average Train Loss: 3.8099
Average Val Loss: 3.8699


Training Epoch 140: 100%|██████████| 811/811 [00:11<00:00, 71.95it/s, loss=4.5263]
Validation Epoch 140: 100%|██████████| 203/203 [00:01<00:00, 174.90it/s, loss=4.4628]



Epoch 140:
Average Train Loss: 3.8064
Average Val Loss: 3.7307


Training Epoch 141: 100%|██████████| 811/811 [00:10<00:00, 78.64it/s, loss=3.2951]
Validation Epoch 141: 100%|██████████| 203/203 [00:01<00:00, 175.80it/s, loss=4.1376]



Epoch 141:
Average Train Loss: 3.8172
Average Val Loss: 3.8134


Training Epoch 142: 100%|██████████| 811/811 [00:12<00:00, 63.35it/s, loss=4.0084]
Validation Epoch 142: 100%|██████████| 203/203 [00:01<00:00, 182.69it/s, loss=3.3316]



Epoch 142:
Average Train Loss: 3.7988
Average Val Loss: 3.7472


Training Epoch 143: 100%|██████████| 811/811 [00:13<00:00, 59.29it/s, loss=4.5775]
Validation Epoch 143: 100%|██████████| 203/203 [00:01<00:00, 183.06it/s, loss=4.4009]



Epoch 143:
Average Train Loss: 3.8006
Average Val Loss: 3.8714


Training Epoch 144: 100%|██████████| 811/811 [00:13<00:00, 59.10it/s, loss=3.3244]
Validation Epoch 144: 100%|██████████| 203/203 [00:01<00:00, 183.34it/s, loss=4.6805]



Epoch 144:
Average Train Loss: 3.8101
Average Val Loss: 3.8325


Training Epoch 145: 100%|██████████| 811/811 [00:13<00:00, 59.21it/s, loss=4.1227]
Validation Epoch 145: 100%|██████████| 203/203 [00:01<00:00, 183.19it/s, loss=4.2961]



Epoch 145:
Average Train Loss: 3.8010
Average Val Loss: 3.7654


Training Epoch 146: 100%|██████████| 811/811 [00:13<00:00, 59.55it/s, loss=4.1537]
Validation Epoch 146: 100%|██████████| 203/203 [00:01<00:00, 183.70it/s, loss=3.7998]



Epoch 146:
Average Train Loss: 3.7915
Average Val Loss: 3.8329


Training Epoch 147: 100%|██████████| 811/811 [00:13<00:00, 59.31it/s, loss=4.5814]
Validation Epoch 147: 100%|██████████| 203/203 [00:01<00:00, 183.40it/s, loss=2.9278]



Epoch 147:
Average Train Loss: 3.8174
Average Val Loss: 3.8011


Training Epoch 148: 100%|██████████| 811/811 [00:13<00:00, 61.05it/s, loss=3.7134]
Validation Epoch 148: 100%|██████████| 203/203 [00:01<00:00, 183.85it/s, loss=3.4896]



Epoch 148:
Average Train Loss: 3.8197
Average Val Loss: 3.8121


Training Epoch 149: 100%|██████████| 811/811 [00:11<00:00, 67.74it/s, loss=4.8987]
Validation Epoch 149: 100%|██████████| 203/203 [00:01<00:00, 183.74it/s, loss=4.0084]



Epoch 149:
Average Train Loss: 3.7619
Average Val Loss: 3.8279


Training Epoch 150: 100%|██████████| 811/811 [00:12<00:00, 66.16it/s, loss=4.0269]
Validation Epoch 150: 100%|██████████| 203/203 [00:01<00:00, 185.36it/s, loss=3.7022]



Epoch 150:
Average Train Loss: 3.7905
Average Val Loss: 3.7827


Training Epoch 151: 100%|██████████| 811/811 [00:10<00:00, 76.54it/s, loss=3.2854]
Validation Epoch 151: 100%|██████████| 203/203 [00:01<00:00, 184.86it/s, loss=4.3973]



Epoch 151:
Average Train Loss: 3.7827
Average Val Loss: 3.7782


Training Epoch 152: 100%|██████████| 811/811 [00:10<00:00, 77.58it/s, loss=4.7031]
Validation Epoch 152: 100%|██████████| 203/203 [00:01<00:00, 185.30it/s, loss=3.5440]



Epoch 152:
Average Train Loss: 3.7990
Average Val Loss: 3.8189


Training Epoch 153: 100%|██████████| 811/811 [00:12<00:00, 63.15it/s, loss=3.8069]
Validation Epoch 153: 100%|██████████| 203/203 [00:01<00:00, 184.47it/s, loss=2.1993]



Epoch 153:
Average Train Loss: 3.7845
Average Val Loss: 3.7888


Training Epoch 154: 100%|██████████| 811/811 [00:12<00:00, 63.67it/s, loss=4.1728]
Validation Epoch 154: 100%|██████████| 203/203 [00:01<00:00, 184.01it/s, loss=4.2310]



Epoch 154:
Average Train Loss: 3.7854
Average Val Loss: 3.8430


Training Epoch 155: 100%|██████████| 811/811 [00:13<00:00, 59.50it/s, loss=4.3059]
Validation Epoch 155: 100%|██████████| 203/203 [00:01<00:00, 183.64it/s, loss=3.6069]



Epoch 155:
Average Train Loss: 3.8048
Average Val Loss: 3.7968


Training Epoch 156: 100%|██████████| 811/811 [00:13<00:00, 59.57it/s, loss=4.3479]
Validation Epoch 156: 100%|██████████| 203/203 [00:01<00:00, 183.21it/s, loss=4.7801]



Epoch 156:
Average Train Loss: 3.7791
Average Val Loss: 3.8173


Training Epoch 157: 100%|██████████| 811/811 [00:13<00:00, 59.69it/s, loss=4.3173]
Validation Epoch 157: 100%|██████████| 203/203 [00:01<00:00, 183.88it/s, loss=4.2996]



Epoch 157:
Average Train Loss: 3.7956
Average Val Loss: 3.8167


Training Epoch 158: 100%|██████████| 811/811 [00:13<00:00, 61.38it/s, loss=3.6618]
Validation Epoch 158: 100%|██████████| 203/203 [00:01<00:00, 183.47it/s, loss=4.3030]



Epoch 158:
Average Train Loss: 3.8101
Average Val Loss: 3.7672


Training Epoch 159: 100%|██████████| 811/811 [00:11<00:00, 71.32it/s, loss=3.1156]
Validation Epoch 159: 100%|██████████| 203/203 [00:01<00:00, 184.16it/s, loss=4.0918]



Epoch 159:
Average Train Loss: 3.8115
Average Val Loss: 3.7376


Training Epoch 160: 100%|██████████| 811/811 [00:13<00:00, 62.19it/s, loss=2.5219]
Validation Epoch 160: 100%|██████████| 203/203 [00:01<00:00, 184.37it/s, loss=4.1204]



Epoch 160:
Average Train Loss: 3.7868
Average Val Loss: 3.7879


Training Epoch 161: 100%|██████████| 811/811 [00:10<00:00, 79.25it/s, loss=4.5751]
Validation Epoch 161: 100%|██████████| 203/203 [00:01<00:00, 184.32it/s, loss=4.2299]



Epoch 161:
Average Train Loss: 3.7828
Average Val Loss: 3.7716


Training Epoch 162: 100%|██████████| 811/811 [00:10<00:00, 79.37it/s, loss=2.8194]
Validation Epoch 162: 100%|██████████| 203/203 [00:01<00:00, 185.15it/s, loss=4.0718]



Epoch 162:
Average Train Loss: 3.8061
Average Val Loss: 3.8080


Training Epoch 163: 100%|██████████| 811/811 [00:10<00:00, 79.47it/s, loss=3.8020]
Validation Epoch 163: 100%|██████████| 203/203 [00:01<00:00, 184.70it/s, loss=3.7624]



Epoch 163:
Average Train Loss: 3.8023
Average Val Loss: 3.7961


Training Epoch 164: 100%|██████████| 811/811 [00:10<00:00, 79.42it/s, loss=4.5760]
Validation Epoch 164: 100%|██████████| 203/203 [00:01<00:00, 184.30it/s, loss=3.2337]



Epoch 164:
Average Train Loss: 3.7936
Average Val Loss: 3.7570


Training Epoch 165: 100%|██████████| 811/811 [00:10<00:00, 79.54it/s, loss=3.5267]
Validation Epoch 165: 100%|██████████| 203/203 [00:01<00:00, 184.67it/s, loss=4.7165]



Epoch 165:
Average Train Loss: 3.7968
Average Val Loss: 3.7594


Training Epoch 166: 100%|██████████| 811/811 [00:10<00:00, 79.79it/s, loss=4.1847]
Validation Epoch 166: 100%|██████████| 203/203 [00:01<00:00, 185.57it/s, loss=4.7911]



Epoch 166:
Average Train Loss: 3.8278
Average Val Loss: 3.8170


Training Epoch 167: 100%|██████████| 811/811 [00:10<00:00, 79.74it/s, loss=3.6830]
Validation Epoch 167: 100%|██████████| 203/203 [00:01<00:00, 185.45it/s, loss=4.0737]



Epoch 167:
Average Train Loss: 3.7413
Average Val Loss: 3.7569


Training Epoch 168: 100%|██████████| 811/811 [00:10<00:00, 79.72it/s, loss=3.1145]
Validation Epoch 168: 100%|██████████| 203/203 [00:01<00:00, 185.26it/s, loss=4.3498]



Epoch 168:
Average Train Loss: 3.8046
Average Val Loss: 3.7964


Training Epoch 169: 100%|██████████| 811/811 [00:10<00:00, 79.64it/s, loss=4.8081]
Validation Epoch 169: 100%|██████████| 203/203 [00:01<00:00, 184.86it/s, loss=4.7782]



Epoch 169:
Average Train Loss: 3.8156
Average Val Loss: 3.7715


Training Epoch 170: 100%|██████████| 811/811 [00:10<00:00, 79.72it/s, loss=2.9865]
Validation Epoch 170: 100%|██████████| 203/203 [00:01<00:00, 185.44it/s, loss=2.4942]



Epoch 170:
Average Train Loss: 3.7925
Average Val Loss: 3.7119


Training Epoch 171: 100%|██████████| 811/811 [00:10<00:00, 79.53it/s, loss=3.1022]
Validation Epoch 171: 100%|██████████| 203/203 [00:01<00:00, 180.94it/s, loss=2.9235]



Epoch 171:
Average Train Loss: 3.7698
Average Val Loss: 3.8213


Training Epoch 172: 100%|██████████| 811/811 [00:10<00:00, 79.50it/s, loss=4.0688]
Validation Epoch 172: 100%|██████████| 203/203 [00:01<00:00, 178.77it/s, loss=3.7596]



Epoch 172:
Average Train Loss: 3.7982
Average Val Loss: 3.8087


Training Epoch 173: 100%|██████████| 811/811 [00:10<00:00, 79.61it/s, loss=4.4421]
Validation Epoch 173: 100%|██████████| 203/203 [00:01<00:00, 176.03it/s, loss=4.5150]



Epoch 173:
Average Train Loss: 3.8548
Average Val Loss: 3.7896


Training Epoch 174: 100%|██████████| 811/811 [00:10<00:00, 79.34it/s, loss=4.0653]
Validation Epoch 174: 100%|██████████| 203/203 [00:01<00:00, 185.24it/s, loss=3.6637]



Epoch 174:
Average Train Loss: 3.8021
Average Val Loss: 3.7990


Training Epoch 175: 100%|██████████| 811/811 [00:10<00:00, 79.74it/s, loss=3.2619]
Validation Epoch 175: 100%|██████████| 203/203 [00:01<00:00, 183.22it/s, loss=3.2838]



Epoch 175:
Average Train Loss: 3.7777
Average Val Loss: 3.8547


Training Epoch 176: 100%|██████████| 811/811 [00:10<00:00, 79.41it/s, loss=3.7435]
Validation Epoch 176: 100%|██████████| 203/203 [00:01<00:00, 185.55it/s, loss=3.5548]



Epoch 176:
Average Train Loss: 3.7894
Average Val Loss: 3.8238


Training Epoch 177: 100%|██████████| 811/811 [00:10<00:00, 79.41it/s, loss=3.1990]
Validation Epoch 177: 100%|██████████| 203/203 [00:01<00:00, 185.40it/s, loss=1.7140]



Epoch 177:
Average Train Loss: 3.7924
Average Val Loss: 3.7931


Training Epoch 178: 100%|██████████| 811/811 [00:10<00:00, 78.80it/s, loss=4.0312]
Validation Epoch 178: 100%|██████████| 203/203 [00:01<00:00, 177.39it/s, loss=3.9131]



Epoch 178:
Average Train Loss: 3.7937
Average Val Loss: 3.7350


Training Epoch 179: 100%|██████████| 811/811 [00:10<00:00, 77.15it/s, loss=3.8467]
Validation Epoch 179: 100%|██████████| 203/203 [00:01<00:00, 185.40it/s, loss=2.9972]



Epoch 179:
Average Train Loss: 3.7754
Average Val Loss: 3.7315


Training Epoch 180: 100%|██████████| 811/811 [00:10<00:00, 79.15it/s, loss=3.5801]
Validation Epoch 180: 100%|██████████| 203/203 [00:01<00:00, 185.31it/s, loss=2.1515]



Epoch 180:
Average Train Loss: 3.7874
Average Val Loss: 3.7671


Training Epoch 181: 100%|██████████| 811/811 [00:10<00:00, 78.37it/s, loss=4.7894]
Validation Epoch 181: 100%|██████████| 203/203 [00:01<00:00, 185.04it/s, loss=1.5886]



Epoch 181:
Average Train Loss: 3.7908
Average Val Loss: 3.7941


Training Epoch 182: 100%|██████████| 811/811 [00:11<00:00, 70.31it/s, loss=4.2762]
Validation Epoch 182: 100%|██████████| 203/203 [00:01<00:00, 184.42it/s, loss=4.5703]



Epoch 182:
Average Train Loss: 3.7688
Average Val Loss: 3.7723


Training Epoch 183: 100%|██████████| 811/811 [00:13<00:00, 60.71it/s, loss=4.5488]
Validation Epoch 183: 100%|██████████| 203/203 [00:01<00:00, 170.51it/s, loss=4.0942]



Epoch 183:
Average Train Loss: 3.7720
Average Val Loss: 3.7538


Training Epoch 184: 100%|██████████| 811/811 [00:10<00:00, 78.92it/s, loss=4.3912]
Validation Epoch 184: 100%|██████████| 203/203 [00:01<00:00, 171.10it/s, loss=3.6683]



Epoch 184:
Average Train Loss: 3.7999
Average Val Loss: 3.8433


Training Epoch 185: 100%|██████████| 811/811 [00:10<00:00, 75.01it/s, loss=4.0552]
Validation Epoch 185: 100%|██████████| 203/203 [00:01<00:00, 170.57it/s, loss=4.0686]



Epoch 185:
Average Train Loss: 3.7802
Average Val Loss: 3.7775


Training Epoch 186: 100%|██████████| 811/811 [00:13<00:00, 60.71it/s, loss=4.2463]
Validation Epoch 186: 100%|██████████| 203/203 [00:01<00:00, 170.37it/s, loss=2.7249]



Epoch 186:
Average Train Loss: 3.7953
Average Val Loss: 3.7271


Training Epoch 187: 100%|██████████| 811/811 [00:13<00:00, 61.27it/s, loss=4.1831]
Validation Epoch 187: 100%|██████████| 203/203 [00:01<00:00, 171.18it/s, loss=4.3870]



Epoch 187:
Average Train Loss: 3.7878
Average Val Loss: 3.7549


Training Epoch 188: 100%|██████████| 811/811 [00:12<00:00, 62.91it/s, loss=4.3020]
Validation Epoch 188: 100%|██████████| 203/203 [00:01<00:00, 171.25it/s, loss=3.5003]



Epoch 188:
Average Train Loss: 3.7923
Average Val Loss: 3.7224


Training Epoch 189: 100%|██████████| 811/811 [00:11<00:00, 69.29it/s, loss=3.4930]
Validation Epoch 189: 100%|██████████| 203/203 [00:01<00:00, 184.03it/s, loss=4.4491]



Epoch 189:
Average Train Loss: 3.7813
Average Val Loss: 3.7684


Training Epoch 190: 100%|██████████| 811/811 [00:13<00:00, 59.43it/s, loss=4.2160]
Validation Epoch 190: 100%|██████████| 203/203 [00:01<00:00, 183.72it/s, loss=2.6269]



Epoch 190:
Average Train Loss: 3.7777
Average Val Loss: 3.7960


Training Epoch 191: 100%|██████████| 811/811 [00:12<00:00, 65.44it/s, loss=3.4588]
Validation Epoch 191: 100%|██████████| 203/203 [00:01<00:00, 184.37it/s, loss=2.8288]



Epoch 191:
Average Train Loss: 3.8090
Average Val Loss: 3.7703


Training Epoch 192: 100%|██████████| 811/811 [00:10<00:00, 79.15it/s, loss=3.8383]
Validation Epoch 192: 100%|██████████| 203/203 [00:01<00:00, 185.57it/s, loss=4.4245]



Epoch 192:
Average Train Loss: 3.7561
Average Val Loss: 3.7296


Training Epoch 193: 100%|██████████| 811/811 [00:10<00:00, 79.15it/s, loss=3.9384]
Validation Epoch 193: 100%|██████████| 203/203 [00:01<00:00, 123.73it/s, loss=1.8064]



Epoch 193:
Average Train Loss: 3.8022
Average Val Loss: 3.7589


Training Epoch 194: 100%|██████████| 811/811 [00:10<00:00, 79.96it/s, loss=3.5980]
Validation Epoch 194: 100%|██████████| 203/203 [00:01<00:00, 185.29it/s, loss=2.4617]



Epoch 194:
Average Train Loss: 3.8053
Average Val Loss: 3.7843


Training Epoch 195: 100%|██████████| 811/811 [00:10<00:00, 78.75it/s, loss=3.8102]
Validation Epoch 195: 100%|██████████| 203/203 [00:01<00:00, 184.81it/s, loss=4.8530]



Epoch 195:
Average Train Loss: 3.7580
Average Val Loss: 3.8383


Training Epoch 196: 100%|██████████| 811/811 [00:10<00:00, 75.49it/s, loss=4.8805]
Validation Epoch 196: 100%|██████████| 203/203 [00:01<00:00, 185.30it/s, loss=4.1212]



Epoch 196:
Average Train Loss: 3.8000
Average Val Loss: 3.7912


Training Epoch 197: 100%|██████████| 811/811 [00:12<00:00, 67.10it/s, loss=2.4235]
Validation Epoch 197: 100%|██████████| 203/203 [00:01<00:00, 185.03it/s, loss=4.0022]



Epoch 197:
Average Train Loss: 3.7892
Average Val Loss: 3.7886


Training Epoch 198: 100%|██████████| 811/811 [00:10<00:00, 79.51it/s, loss=3.5039]
Validation Epoch 198: 100%|██████████| 203/203 [00:01<00:00, 185.40it/s, loss=4.4716]



Epoch 198:
Average Train Loss: 3.7887
Average Val Loss: 3.7113


Training Epoch 199: 100%|██████████| 811/811 [00:10<00:00, 79.40it/s, loss=4.4457]
Validation Epoch 199: 100%|██████████| 203/203 [00:01<00:00, 185.05it/s, loss=3.3322]


Epoch 199:
Average Train Loss: 3.7729
Average Val Loss: 3.7972





In [None]:
checkpoint = {
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'model_args': {
        'vocab_size': vocab_size,
        'embed_dim': embed_dim,
        'num_layers': num_layers,
        'num_heads': num_heads,
        'dim_feedforward': dim_feedforward,
        'num_fourier_features': num_fourier_features
    }
}

torch.save(checkpoint, 'model_checkpoint_3.58pt')

In [None]:
from model_arch import CategoricalScoreDiffusion

checkpoint = torch.load('model_checkpoint_2.65.pt')
model = CategoricalScoreDiffusion(**checkpoint['model_args'])
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [None]:
# Access the learning rate
# Get the optimizer state dict
optimizer_state = checkpoint['optimizer_state_dict']
learning_rate = optimizer_state['param_groups'][0]['lr']
print(f"Learning rate: {learning_rate}")

Learning rate: 0.001


In [None]:
import time
from contextlib import contextmanager

@contextmanager
def timer(name):
    start = time.perf_counter()
    yield
    end = time.perf_counter()
    print(f"{name}: {(end - start)*1000:.2f} ms")