In [1]:
# v1: out_channels=512
# v2: out_channels=128 inceptionc -> 128

In [2]:
import numpy as np
import pandas as pd
import torch
from torch import Tensor
import torch.nn as nn
from typing import Callable, Any, Optional, Tuple, List
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts, ReduceLROnPlateau
from sklearn.cluster import KMeans
from sklearn.model_selection import StratifiedKFold
import random
import os
from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
import copy
import time
from sklearn.metrics import mean_absolute_error

In [3]:
# InceptionA and InceptionC
# Copied from https://github.com/pytorch/vision/blob/master/torchvision/models/inception.py
# Copied from https://amaarora.github.io/2020/07/24/SeNet.html

class BasicConv1d(nn.Module):

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        **kwargs: Any
    ) -> None:
        super(BasicConv1d, self).__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm1d(out_channels, eps=0.001)

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv(x)
        x = self.bn(x)
        return F.leaky_relu(x, inplace=True)

    
class InceptionA(nn.Module):

    def __init__(
        self,
        in_channels: int,
        pool_features: int,
        conv_block: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(InceptionA, self).__init__()
        if conv_block is None:
            conv_block = BasicConv1d
        self.branch1x1 = conv_block(in_channels, 64, kernel_size=1)

        self.branch5x5_1 = conv_block(in_channels, 48, kernel_size=1)
        self.branch5x5_2 = conv_block(48, 64, kernel_size=5, padding=2)

        self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1)
        self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1)
        self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, padding=1)

        self.branch_pool = conv_block(in_channels, pool_features, kernel_size=1)

    def _forward(self, x: Tensor) -> List[Tensor]:
        branch1x1 = self.branch1x1(x)

        branch5x5 = self.branch5x5_1(x)
        branch5x5 = self.branch5x5_2(branch5x5)

        branch3x3dbl = self.branch3x3dbl_1(x)
        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)

        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)

        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
        return outputs

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._forward(x)
        return torch.cat(outputs, 1)


class InceptionC(nn.Module):

    def __init__(
        self,
        in_channels: int,
        channels_7x7: int,
        conv_block: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(InceptionC, self).__init__()
        if conv_block is None:
            conv_block = BasicConv1d
        self.branch1x1 = conv_block(in_channels, 128, kernel_size=1)

        c7 = channels_7x7
        self.branch7x7_1 = conv_block(in_channels, c7, kernel_size=1)
        self.branch7x7_2 = conv_block(c7, c7, kernel_size=1, padding=0)
        self.branch7x7_3 = conv_block(c7, 128, kernel_size=7, padding=3)

        self.branch7x7dbl_1 = conv_block(in_channels, c7, kernel_size=1)
        self.branch7x7dbl_2 = conv_block(c7, c7, kernel_size=7, padding=3,)
        self.branch7x7dbl_3 = conv_block(c7, c7, kernel_size=1, padding=0,)
        self.branch7x7dbl_4 = conv_block(c7, c7, kernel_size=7, padding=3,)
        self.branch7x7dbl_5 = conv_block(c7, 128, kernel_size=1, padding=0,)

        self.branch_pool = conv_block(in_channels, 128, kernel_size=1)

    def _forward(self, x: Tensor) -> List[Tensor]:
        branch1x1 = self.branch1x1(x)

        branch7x7 = self.branch7x7_1(x)
        branch7x7 = self.branch7x7_2(branch7x7)
        branch7x7 = self.branch7x7_3(branch7x7)

        branch7x7dbl = self.branch7x7dbl_1(x)
        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)

        branch_pool = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)

        outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
        return outputs

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._forward(x)
        return torch.cat(outputs, 1)
    

class SE_Block(nn.Module):
    "credits: https://github.com/moskomule/senet.pytorch/blob/master/senet/se_module.py#L4"
    def __init__(self, c, r=16):
        super().__init__()
        self.squeeze = nn.AdaptiveAvgPool1d(1)
        self.excitation = nn.Sequential(
            nn.Linear(c, c // r, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(c // r, c, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        bs, c, _ = x.shape
        y = self.squeeze(x).view(bs, c)
        y = self.excitation(y).view(bs, c, 1)
        return x * y.expand_as(x)

In [4]:
class CFG:
    lr = 0.005
    min_lr = 1e-6
    weight_decay = 0.001
    epochs = 1200
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    seed = 42
    NFOLD = 5
    batch_size = 128
    DATA_ROOT = r'../input/2nd-solution-reproduce-feature-part/train'
    OUTPUT_DIR = r'./'
    scheduler='CosineAnnealingWarmRestarts'#
    target_col = 'time_to_eruption_normalize'
    model_name = '2nd_model'
    # for warm start
    T_0 = 10
    # for normal cosine
    T_max = 10
    num_workers = 1
    
    EARLY_STOP = True
    early_stop = 20
    
    gradient_accumulation_steps=1
    max_grad_norm = 1000
    print_freq = 20

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG.seed)

In [6]:
class MyModel(nn.Module):
    def __init__(self, in_channels=340, out_channels=128, pool_features=64):
        super(MyModel, self).__init__()
        self.bn1 = nn.BatchNorm1d(in_channels)
        self.stem1 = nn.Sequential(nn.Conv1d(in_channels, out_channels, 3, padding=1), 
                                   nn.BatchNorm1d(out_channels), 
                                   # N,C,L => N,C,Lout
                                   nn.MaxPool1d(2,2))
        self.stem2 = nn.Sequential(nn.Conv1d(out_channels, out_channels, 3, padding=1), 
                                   nn.BatchNorm1d(out_channels), 
                                   nn.MaxPool1d(2,2))
        self.inceptiona = InceptionA(in_channels=out_channels, pool_features=pool_features)
        self.inceptionc = InceptionC(in_channels=out_channels, channels_7x7=out_channels)
        
        self.SE_Block = SE_Block(928)
        # (N, C, L)
        self.Bi_LSTM = nn.LSTM(input_size=928, 
                               hidden_size=128,
                               batch_first=True, 
                               bidirectional=True)
        self.adaptive_pooling = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(29, 1)
        
    def forward(self, x):
        x = self.bn1(x)
        x = F.leaky_relu(x)
        x = self.stem1(x)
        x = F.leaky_relu(x)
        x = self.stem2(x)
        x = F.leaky_relu(x)
        x1 = self.inceptiona(x)
        x2 = self.inceptionc(x)
        x = torch.cat([x, x1, x2], dim=1)
        x = F.leaky_relu(x)
        x = self.SE_Block(x)
        x = F.leaky_relu(x)
        x = self.Bi_LSTM(x.permute(0,2,1))
        x = self.adaptive_pooling(x[0])
        x = F.relu(x)
        x = self.fc(x.squeeze(-1))
        
        return x

In [7]:
class INGVDataset(Dataset):
    def __init__(self, df, data_root, transforms=None, output_label=True):
        super().__init__()
        self.df = df
        self.data_root = data_root
        self.transforms = transforms
        self.output_label = output_label
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        
        # get labels
        if self.output_label:
            #target = self.df.iloc[index]['label']
            target = [self.df.loc[index, 'time_to_eruption_normalize'].astype('float32')]
          
        path = os.path.join(self.data_root, str(self.df.loc[index, 'segment_id'])+'.npz')
        
        # (10, 256, 256)
        img = np.load(path)['arr_0'].astype('float32')
            
        # do label smoothing
        if self.output_label == True:
            return torch.tensor(img), torch.tensor(target)
        else:
            return torch.tensor(img)

In [8]:
df = pd.read_csv(r'../input/predict-volcanic-eruptions-ingv-oe/train.csv')
km = KMeans(n_clusters=5)
df['clusters'] = km.fit_predict(df['time_to_eruption'].values.reshape(-1,1))

# take one fold as example
skf = StratifiedKFold(n_splits=CFG.NFOLD)
df['fold'] = 0
time2eruption_mean = df['time_to_eruption'].mean(axis=0)
for trn_idx, val_idx in skf.split(df['clusters'].values.reshape(-1,1), df['clusters'].values.reshape(-1,1)):
    df.loc[trn_idx, 'fold'] = 1
    break

train_df = df.loc[df['fold'] == 1].reset_index(drop=True)
valid_df = df.loc[df['fold'] != 1].reset_index(drop=True)
train_df['time_to_eruption_normalize'] = train_df['time_to_eruption'] / time2eruption_mean
valid_df['time_to_eruption_normalize'] = valid_df['time_to_eruption'] / time2eruption_mean

In [9]:
# 记录训练过程
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        
        self.true_val = 0
        self.true_avg = 0
        self.true_sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
    def true_update(self, val, n=1):
        self.true_val = val * time2eruption_mean
        self.true_sum += self.true_val * n
        self.count += n
        self.true_avg = self.true_sum / self.count
        
def asMinutes(s):
    m = s // 60
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

class LogCoshLoss(torch.nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, y_t, y_prime_t):
        ey_t = y_t - y_prime_t
        return torch.mean(torch.log(torch.cosh(ey_t + 1e-12)))

In [10]:
def train_fn(train_loader, model, epoch, criterion, optimizer, scheduler, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    
    # switch to train mode
    model.train()
    
    start = end = time.time()
    global_step = 0
    for step, (images, labels) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        # torch.Size([32, 10, 256, 256])
        # print(images.shape)
        # torch.Size([32, 1])
        # print(labels.shape)
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        y_preds = model(images)
        # torch.Size([32, 1])
        # print(y_preds.shape)
        loss = criterion(labels, y_preds)
        # record loss
        losses.update(loss.item(), batch_size)
        losses.true_update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        loss.backward()
        
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1
            
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) {loss.true_val:.4f}({loss.true_avg:.4f})'
                  'Grad: {grad_norm:.4f}  '
                  #'LR: {lr:.6f}  '
                  .format(
                   epoch+1, step, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   grad_norm=grad_norm,
                   #lr=scheduler.get_lr()[0],
                   ))
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to evaluation mode
    model.eval()
    preds = []
    start = end = time.time()
    for step, (images, labels) in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(labels, y_preds)
        losses.update(loss.item(), batch_size)
        losses.true_update(loss.item(), batch_size)
        # record accuracy
        # preds.append(y_preds.softmax(1).to('cpu').numpy())
        preds.append(y_preds.to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) {loss.true_val:.4f}({loss.true_avg:.4f})'
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
    predictions = np.concatenate(preds)
    
    return losses.avg, predictions

def init_logger(log_file=CFG.OUTPUT_DIR+'train.log'):
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

def get_score(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred)

In [11]:
# ====================================================
# Train loop
# ====================================================

LOGGER = init_logger()

def train_loop(train_df, valid_df):

    LOGGER.info(f"========== start training ==========")
    
    # 存储结果
    res_valid_df = copy.deepcopy(valid_df)
    # ====================================================
    # loader
    # ====================================================

    train_dataset = INGVDataset(train_df, CFG.DATA_ROOT)
    valid_dataset = INGVDataset(valid_df, CFG.DATA_ROOT)

    train_loader = DataLoader(train_dataset, batch_size=CFG.batch_size, num_workers=CFG.num_workers, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=CFG.batch_size, num_workers=CFG.num_workers, shuffle=False)
    
    # model
    model = MyModel()
    model.to(CFG.device)
    
    # optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    
    # ====================================================
    # scheduler 
    # ====================================================
    if CFG.scheduler=='CosineAnnealingLR':
        scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
    elif CFG.scheduler=='CosineAnnealingWarmRestarts':
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=2, eta_min=CFG.min_lr, last_epoch=-1)
    elif CFG.scheduler=='ReduceLROnPlateau':
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)
    else:
        raise Exception('scheduler not set')


    # ====================================================
    # loop
    # ====================================================
    criterion = nn.L1Loss()#LogCoshLoss()

    best_score = float('inf')
    best_loss = np.inf
    early_round = 0
    
    for epoch in range(CFG.epochs):
        
        start_time = time.time()
        
        # =================================================
        # train
        avg_loss = train_fn(train_loader, model, epoch, criterion, optimizer, scheduler, CFG.device)

        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, CFG.device)
        valid_labels = valid_df[CFG.target_col].values
        
        if isinstance(scheduler, torch.optim.lr_scheduler.CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, torch.optim.lr_scheduler.CosineAnnealingWarmRestarts):
            scheduler.step()
        elif isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(avg_val_loss)

        # scoring
        score = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - MAE: {score}')
        
#         save_checkpoint(model, optimizer, scheduler, epoch, fold)
#         print('===================== checkpoint saved =======================')
        
        if score < best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        CFG.OUTPUT_DIR+f'{CFG.model_name}_best.pth')
            early_round = 0
            
        else:
            if CFG.EARLY_STOP:
                early_round += 1
                if early_round == CFG.early_stop:
                    LOGGER.info('===================== Early Stop =====================')
                    break
    
    check_point = torch.load(CFG.OUTPUT_DIR+f'{CFG.model_name}_best.pth')
    res_valid_df['preds'] = check_point['preds']

    return res_valid_df

In [12]:
res_valid_df = train_loop(train_df, valid_df)



Epoch: [1][0/28] Data 3.521 (3.521) Elapsed 0m 4s (remain 1m 54s) Loss: 0.9866(0.9866) 22542517.2466(11271258.6233)Grad: 0.9689  
Epoch: [1][20/28] Data 1.860 (2.004) Elapsed 0m 43s (remain 0m 14s) Loss: 0.3154(0.2677) 7206442.4918(5970661.8479)Grad: 1.1613  
Epoch: [1][27/28] Data 1.298 (1.951) Elapsed 0m 57s (remain 0m 0s) Loss: 0.3016(0.2384) 6891297.2442(5379026.7094)Grad: 1.2545  
EVAL: [0/7] Data 2.209 (2.209) Elapsed 0m 2s (remain 0m 13s) Loss: 0.2596(0.2596) 5931135.9472(2965567.9736)


Epoch 1 - avg_train_loss: 0.2384  avg_val_loss: 0.1498  time: 71s
Epoch 1 - MAE: 0.27947450550788366
Epoch 1 - Save Best Score: 0.2795 Model


EVAL: [6/7] Data 1.818 (1.933) Elapsed 0m 13s (remain 0m 0s) Loss: 0.3265(0.1498) 7460053.6485(3192843.4554)
Epoch: [2][0/28] Data 0.289 (0.289) Elapsed 0m 0s (remain 0m 9s) Loss: 0.2070(0.2070) 4730457.8308(2365228.9154)Grad: 0.1206  
Epoch: [2][20/28] Data 0.184 (0.194) Elapsed 0m 5s (remain 0m 1s) Loss: 0.1707(0.1091) 3899817.4022(2433858.2671)Grad: 0.3485  
Epoch: [2][27/28] Data 0.109 (0.188) Elapsed 0m 6s (remain 0m 0s) Loss: 0.1730(0.1046) 3952217.2232(2361329.4962)Grad: 0.2216  
EVAL: [0/7] Data 0.274 (0.274) Elapsed 0m 0s (remain 0m 1s) Loss: 0.1950(0.1950) 4454763.1092(2227381.5546)


Epoch 2 - avg_train_loss: 0.1046  avg_val_loss: 0.1061  time: 9s
Epoch 2 - MAE: 0.19791761063278251
Epoch 2 - Save Best Score: 0.1979 Model


EVAL: [6/7] Data 0.196 (0.252) Elapsed 0m 1s (remain 0m 0s) Loss: 0.2414(0.1061) 5515169.5109(2261100.5275)
Epoch: [3][0/28] Data 0.407 (0.407) Elapsed 0m 0s (remain 0m 13s) Loss: 0.1501(0.1501) 3430111.3748(1715055.6874)Grad: 0.3304  
Epoch: [3][20/28] Data 0.201 (0.195) Elapsed 0m 5s (remain 0m 1s) Loss: 0.1594(0.0797) 3643215.5109(1776845.3130)Grad: 0.1828  
Epoch: [3][27/28] Data 0.108 (0.190) Elapsed 0m 6s (remain 0m 0s) Loss: 0.1547(0.0773) 3535328.4394(1744324.6310)Grad: 0.1207  
EVAL: [0/7] Data 0.287 (0.287) Elapsed 0m 0s (remain 0m 1s) Loss: 0.1493(0.1493) 3411891.8636(1705945.9318)


Epoch 3 - avg_train_loss: 0.0773  avg_val_loss: 0.0798  time: 9s
Epoch 3 - MAE: 0.14886595625043494
Epoch 3 - Save Best Score: 0.1489 Model


EVAL: [6/7] Data 0.210 (0.229) Elapsed 0m 1s (remain 0m 0s) Loss: 0.1749(0.0798) 3995583.2145(1700712.2095)
Epoch: [4][0/28] Data 0.308 (0.308) Elapsed 0m 0s (remain 0m 9s) Loss: 0.1307(0.1307) 2985675.4201(1492837.7100)Grad: 0.7409  
Epoch: [4][20/28] Data 0.182 (0.202) Elapsed 0m 5s (remain 0m 1s) Loss: 0.1204(0.0639) 2750201.0391(1425117.2894)Grad: 0.4709  
Epoch: [4][27/28] Data 0.131 (0.198) Elapsed 0m 7s (remain 0m 0s) Loss: 0.1176(0.0638) 2686315.6262(1440118.5037)Grad: 0.3274  
EVAL: [0/7] Data 0.294 (0.294) Elapsed 0m 0s (remain 0m 1s) Loss: 0.1183(0.1183) 2703505.5403(1351752.7702)


Epoch 4 - avg_train_loss: 0.0638  avg_val_loss: 0.0690  time: 9s
Epoch 4 - MAE: 0.1286896674390372
Epoch 4 - Save Best Score: 0.1287 Model


EVAL: [6/7] Data 0.201 (0.236) Elapsed 0m 1s (remain 0m 0s) Loss: 0.1278(0.0690) 2919609.6037(1470209.1234)
Epoch: [5][0/28] Data 0.291 (0.291) Elapsed 0m 0s (remain 0m 9s) Loss: 0.1219(0.1219) 2785291.0986(1392645.5493)Grad: 0.9893  
Epoch: [5][20/28] Data 0.190 (0.201) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0909(0.0568) 2075969.3196(1266631.8110)Grad: 0.1207  
Epoch: [5][27/28] Data 0.114 (0.195) Elapsed 0m 7s (remain 0m 0s) Loss: 0.1244(0.0562) 2841751.0869(1267576.3513)Grad: 0.7257  
EVAL: [0/7] Data 0.282 (0.282) Elapsed 0m 0s (remain 0m 1s) Loss: 0.1014(0.1014) 2317053.7112(1158526.8556)


Epoch 5 - avg_train_loss: 0.0562  avg_val_loss: 0.0582  time: 9s
Epoch 5 - MAE: 0.10859769884931895
Epoch 5 - Save Best Score: 0.1086 Model


EVAL: [6/7] Data 0.201 (0.227) Elapsed 0m 1s (remain 0m 0s) Loss: 0.1081(0.0582) 2470123.5499(1240669.4045)
Epoch: [6][0/28] Data 0.289 (0.289) Elapsed 0m 0s (remain 0m 9s) Loss: 0.1045(0.1045) 2387877.1584(1193938.5792)Grad: 0.1686  
Epoch: [6][20/28] Data 0.180 (0.199) Elapsed 0m 5s (remain 0m 1s) Loss: 0.1001(0.0509) 2287126.1075(1136262.6565)Grad: 0.1578  
Epoch: [6][27/28] Data 0.118 (0.198) Elapsed 0m 7s (remain 0m 0s) Loss: 0.1049(0.0495) 2397378.1200(1115963.8223)Grad: 0.4460  
EVAL: [0/7] Data 0.289 (0.289) Elapsed 0m 0s (remain 0m 1s) Loss: 0.1013(0.1013) 2315728.7518(1157864.3759)


Epoch 6 - avg_train_loss: 0.0495  avg_val_loss: 0.0551  time: 9s
Epoch 6 - MAE: 0.10274440250701766
Epoch 6 - Save Best Score: 0.1027 Model


EVAL: [6/7] Data 0.201 (0.233) Elapsed 0m 1s (remain 0m 0s) Loss: 0.1078(0.0551) 2462620.4970(1173798.6351)
Epoch: [7][0/28] Data 0.284 (0.284) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0828(0.0828) 1892631.2318(946315.6159)Grad: 0.1909  
Epoch: [7][20/28] Data 0.195 (0.197) Elapsed 0m 5s (remain 0m 1s) Loss: 0.1118(0.0419) 2554012.0536(933826.6232)Grad: 1.7710  
Epoch: [7][27/28] Data 0.117 (0.194) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0985(0.0413) 2249965.9585(931310.2849)Grad: 0.6280  
EVAL: [0/7] Data 0.284 (0.284) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0853(0.0853) 1949701.6921(974850.8461)


Epoch 7 - avg_train_loss: 0.0413  avg_val_loss: 0.0460  time: 9s
Epoch 7 - MAE: 0.08579412112950219
Epoch 7 - Save Best Score: 0.0858 Model


EVAL: [6/7] Data 0.210 (0.248) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0799(0.0460) 1826004.6527(980150.9292)
Epoch: [8][0/28] Data 0.280 (0.280) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0589(0.0589) 1346102.2430(673051.1215)Grad: 0.2250  
Epoch: [8][20/28] Data 0.202 (0.199) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0689(0.0382) 1574427.6653(852004.2287)Grad: 0.2921  
Epoch: [8][27/28] Data 0.116 (0.195) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0679(0.0380) 1552080.5727(858453.2183)Grad: 0.8156  
EVAL: [0/7] Data 0.290 (0.290) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0807(0.0807) 1844584.5570(922292.2785)


Epoch 8 - avg_train_loss: 0.0380  avg_val_loss: 0.0453  time: 9s
Epoch 8 - MAE: 0.08443447437329264
Epoch 8 - Save Best Score: 0.0844 Model


EVAL: [6/7] Data 0.205 (0.230) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0898(0.0453) 2050738.5719(964617.7547)
Epoch: [9][0/28] Data 0.289 (0.289) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0641(0.0641) 1464516.4685(732258.2343)Grad: 0.3247  
Epoch: [9][20/28] Data 0.247 (0.217) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0607(0.0346) 1387991.5932(771247.4427)Grad: 0.6279  
Epoch: [9][27/28] Data 0.136 (0.220) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0751(0.0347) 1716256.7138(783130.4777)Grad: 0.2617  
EVAL: [0/7] Data 0.328 (0.328) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0722(0.0722) 1648784.5645(824392.2823)


Epoch 9 - avg_train_loss: 0.0347  avg_val_loss: 0.0414  time: 10s
Epoch 9 - MAE: 0.07724069613096225
Epoch 9 - Save Best Score: 0.0772 Model


EVAL: [6/7] Data 0.217 (0.271) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0815(0.0414) 1862324.8494(882432.7295)
Epoch: [10][0/28] Data 0.304 (0.304) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0601(0.0601) 1373900.3443(686950.1721)Grad: 0.4404  
Epoch: [10][20/28] Data 0.182 (0.200) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0888(0.0336) 2029383.9646(750015.5909)Grad: 1.4402  
Epoch: [10][27/28] Data 0.133 (0.198) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0728(0.0325) 1663177.4745(733514.8064)Grad: 0.8215  
EVAL: [0/7] Data 0.299 (0.299) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0699(0.0699) 1596196.9715(798098.4857)


Epoch 10 - avg_train_loss: 0.0325  avg_val_loss: 0.0400  time: 9s
Epoch 10 - MAE: 0.07455604473952938
Epoch 10 - Save Best Score: 0.0746 Model


EVAL: [6/7] Data 0.200 (0.235) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0738(0.0400) 1687040.8736(851762.0658)
Epoch: [11][0/28] Data 0.285 (0.285) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0609(0.0609) 1391707.7954(695853.8977)Grad: 0.5352  
Epoch: [11][20/28] Data 0.183 (0.199) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0803(0.0422) 1835545.4500(941064.9323)Grad: 0.3397  
Epoch: [11][27/28] Data 0.127 (0.195) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0947(0.0431) 2164697.5087(973185.2589)Grad: 0.5839  
EVAL: [0/7] Data 0.309 (0.309) Elapsed 0m 0s (remain 0m 1s) Loss: 0.1068(0.1068) 2439964.9338(1219982.4669)


Epoch 11 - avg_train_loss: 0.0431  avg_val_loss: 0.0593  time: 9s
Epoch 11 - MAE: 0.11063134304532715


EVAL: [6/7] Data 0.201 (0.240) Elapsed 0m 1s (remain 0m 0s) Loss: 0.1271(0.0593) 2903469.3748(1263902.6438)
Epoch: [12][0/28] Data 0.296 (0.296) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0937(0.0937) 2140942.8914(1070471.4457)Grad: 0.2335  
Epoch: [12][20/28] Data 0.187 (0.191) Elapsed 0m 5s (remain 0m 1s) Loss: 0.1009(0.0473) 2306208.3831(1055854.5597)Grad: 1.6088  
Epoch: [12][27/28] Data 0.114 (0.193) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0842(0.0453) 1924702.1271(1022765.8332)Grad: 1.3309  
EVAL: [0/7] Data 0.290 (0.290) Elapsed 0m 0s (remain 0m 1s) Loss: 0.1029(0.1029) 2352165.3910(1176082.6955)


Epoch 12 - avg_train_loss: 0.0453  avg_val_loss: 0.0550  time: 9s
Epoch 12 - MAE: 0.10264270250198385


EVAL: [6/7] Data 0.214 (0.234) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0993(0.0550) 2270014.1623(1172636.7828)
Epoch: [13][0/28] Data 0.323 (0.323) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0767(0.0767) 1751924.0492(875962.0246)Grad: 0.2382  
Epoch: [13][20/28] Data 0.242 (0.208) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0728(0.0455) 1663764.6241(1014966.6850)Grad: 0.3916  
Epoch: [13][27/28] Data 0.124 (0.204) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0864(0.0436) 1973193.1221(984701.4282)Grad: 0.9501  
EVAL: [0/7] Data 0.297 (0.297) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0901(0.0901) 2057995.6315(1028997.8158)


Epoch 13 - avg_train_loss: 0.0436  avg_val_loss: 0.0481  time: 10s
Epoch 13 - MAE: 0.0896862916782154


EVAL: [6/7] Data 0.232 (0.268) Elapsed 0m 2s (remain 0m 0s) Loss: 0.1029(0.0481) 2352179.6909(1024616.8791)
Epoch: [14][0/28] Data 0.302 (0.302) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0689(0.0689) 1574464.7771(787232.3886)Grad: 0.3594  
Epoch: [14][20/28] Data 0.189 (0.200) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0621(0.0377) 1417831.3543(840865.7594)Grad: 0.7486  
Epoch: [14][27/28] Data 0.122 (0.199) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0685(0.0367) 1564865.0776(827497.0197)Grad: 0.0894  
EVAL: [0/7] Data 0.293 (0.293) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0825(0.0825) 1885650.2976(942825.1488)


Epoch 14 - avg_train_loss: 0.0367  avg_val_loss: 0.0433  time: 9s
Epoch 14 - MAE: 0.08070607631326528


EVAL: [6/7] Data 0.234 (0.243) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0994(0.0433) 2270196.9975(922022.8114)
Epoch: [15][0/28] Data 0.313 (0.313) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0680(0.0680) 1553110.1698(776555.0849)Grad: 0.8117  
Epoch: [15][20/28] Data 0.196 (0.209) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0523(0.0313) 1194863.3099(698802.0100)Grad: 0.4401  
Epoch: [15][27/28] Data 0.118 (0.203) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0711(0.0310) 1625530.4458(699873.4697)Grad: 0.2330  
EVAL: [0/7] Data 0.277 (0.277) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0862(0.0862) 1968862.7877(984431.3938)


Epoch 15 - avg_train_loss: 0.0310  avg_val_loss: 0.0394  time: 9s
Epoch 15 - MAE: 0.07347575997435452
Epoch 15 - Save Best Score: 0.0735 Model


EVAL: [6/7] Data 0.206 (0.232) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0732(0.0394) 1671842.3992(839420.3854)
Epoch: [16][0/28] Data 0.292 (0.292) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0480(0.0480) 1096195.1164(548097.5582)Grad: 0.3359  
Epoch: [16][20/28] Data 0.202 (0.208) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0681(0.0311) 1556274.0360(694548.9554)Grad: 0.2964  
Epoch: [16][27/28] Data 0.116 (0.203) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0602(0.0309) 1375295.7821(696976.1457)Grad: 0.9609  
EVAL: [0/7] Data 0.284 (0.284) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0733(0.0733) 1674903.9526(837451.9763)


Epoch 16 - avg_train_loss: 0.0309  avg_val_loss: 0.0372  time: 9s
Epoch 16 - MAE: 0.06946000759390071
Epoch 16 - Save Best Score: 0.0695 Model


EVAL: [6/7] Data 0.201 (0.259) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0830(0.0372) 1897445.3815(793542.6391)
Epoch: [17][0/28] Data 0.311 (0.311) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0640(0.0640) 1462392.2435(731196.1217)Grad: 0.5996  
Epoch: [17][20/28] Data 0.194 (0.206) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0499(0.0281) 1139344.8209(626652.3875)Grad: 0.6853  
Epoch: [17][27/28] Data 0.124 (0.202) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0464(0.0276) 1059510.1857(623175.7785)Grad: 0.0662  
EVAL: [0/7] Data 0.283 (0.283) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0633(0.0633) 1447413.5458(723706.7729)


Epoch 17 - avg_train_loss: 0.0276  avg_val_loss: 0.0323  time: 9s
Epoch 17 - MAE: 0.060270558352432335
Epoch 17 - Save Best Score: 0.0603 Model


EVAL: [6/7] Data 0.202 (0.231) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0627(0.0323) 1433279.5673(688558.1962)
Epoch: [18][0/28] Data 0.292 (0.292) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0514(0.0514) 1173681.8343(586840.9172)Grad: 0.2225  
Epoch: [18][20/28] Data 0.197 (0.200) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0467(0.0249) 1066246.5735(555610.1445)Grad: 0.2945  
Epoch: [18][27/28] Data 0.124 (0.200) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0653(0.0245) 1491839.0961(553827.7825)Grad: 0.3805  
EVAL: [0/7] Data 0.290 (0.290) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0562(0.0562) 1283295.4557(641647.7279)


Epoch 18 - avg_train_loss: 0.0245  avg_val_loss: 0.0308  time: 9s
Epoch 18 - MAE: 0.05750835027845511
Epoch 18 - Save Best Score: 0.0575 Model


EVAL: [6/7] Data 0.204 (0.235) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0599(0.0308) 1367716.3776(657001.4831)
Epoch: [19][0/28] Data 0.276 (0.276) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0528(0.0528) 1206191.9427(603095.9714)Grad: 0.4234  
Epoch: [19][20/28] Data 0.190 (0.194) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0491(0.0233) 1121334.1061(520626.2828)Grad: 1.3366  
Epoch: [19][27/28] Data 0.109 (0.190) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0502(0.0232) 1146769.9050(523772.8321)Grad: 1.0288  
EVAL: [0/7] Data 0.289 (0.289) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0537(0.0537) 1226131.1944(613065.5972)


Epoch 19 - avg_train_loss: 0.0232  avg_val_loss: 0.0285  time: 9s
Epoch 19 - MAE: 0.053175766236972684
Epoch 19 - Save Best Score: 0.0532 Model


EVAL: [6/7] Data 0.207 (0.251) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0567(0.0285) 1294823.6070(607504.0883)
Epoch: [20][0/28] Data 0.330 (0.330) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0382(0.0382) 873290.3154(436645.1577)Grad: 0.0883  
Epoch: [20][20/28] Data 0.207 (0.216) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0523(0.0229) 1194666.1748(510787.9223)Grad: 1.0246  
Epoch: [20][27/28] Data 0.107 (0.207) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0456(0.0226) 1042140.9262(509014.0733)Grad: 0.9533  
EVAL: [0/7] Data 0.291 (0.291) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0511(0.0511) 1167201.9095(583600.9547)


Epoch 20 - avg_train_loss: 0.0226  avg_val_loss: 0.0274  time: 9s
Epoch 20 - MAE: 0.0510959420617084
Epoch 20 - Save Best Score: 0.0511 Model


EVAL: [6/7] Data 0.202 (0.228) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0537(0.0274) 1226423.0668(583743.2145)
Epoch: [21][0/28] Data 0.301 (0.301) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0449(0.0449) 1026189.4908(513094.7454)Grad: 1.2499  
Epoch: [21][20/28] Data 0.229 (0.203) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0403(0.0208) 921116.3621(463354.6583)Grad: 0.9607  
Epoch: [21][27/28] Data 0.123 (0.200) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0521(0.0203) 1189408.8114(458519.3584)Grad: 0.3720  
EVAL: [0/7] Data 0.301 (0.301) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0511(0.0511) 1168167.6675(584083.8338)


Epoch 21 - avg_train_loss: 0.0203  avg_val_loss: 0.0265  time: 9s
Epoch 21 - MAE: 0.04946529443661711
Epoch 21 - Save Best Score: 0.0495 Model


EVAL: [6/7] Data 0.208 (0.240) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0475(0.0265) 1085168.4001(565113.9605)
Epoch: [22][0/28] Data 0.320 (0.320) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0408(0.0408) 932312.2947(466156.1474)Grad: 0.5445  
Epoch: [22][20/28] Data 0.188 (0.210) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0357(0.0198) 815528.5200(442692.6759)Grad: 0.6643  
Epoch: [22][27/28] Data 0.122 (0.205) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0418(0.0191) 955988.2622(431928.3978)Grad: 0.7021  
EVAL: [0/7] Data 0.295 (0.295) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0499(0.0499) 1141132.4861(570566.2431)


Epoch 22 - avg_train_loss: 0.0191  avg_val_loss: 0.0258  time: 9s
Epoch 22 - MAE: 0.04820910214380656
Epoch 22 - Save Best Score: 0.0482 Model


EVAL: [6/7] Data 0.222 (0.239) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0467(0.0258) 1067625.6685(550762.6576)
Epoch: [23][0/28] Data 0.299 (0.299) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0360(0.0360) 821894.8112(410947.4056)Grad: 0.7502  
Epoch: [23][20/28] Data 0.200 (0.207) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0412(0.0190) 940377.7276(423179.4682)Grad: 1.2259  
Epoch: [23][27/28] Data 0.145 (0.205) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0357(0.0186) 816684.9442(419792.7583)Grad: 0.0754  
EVAL: [0/7] Data 0.382 (0.382) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0462(0.0462) 1055184.3626(527592.1813)


Epoch 23 - avg_train_loss: 0.0186  avg_val_loss: 0.0243  time: 10s
Epoch 23 - MAE: 0.0453864131119431
Epoch 23 - Save Best Score: 0.0454 Model


EVAL: [6/7] Data 0.213 (0.259) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0486(0.0243) 1110891.1345(518514.9645)
Epoch: [24][0/28] Data 0.308 (0.308) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0333(0.0333) 761188.8413(380594.4207)Grad: 1.2507  
Epoch: [24][20/28] Data 0.303 (0.208) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0308(0.0173) 703761.3927(386322.9967)Grad: 0.1318  
Epoch: [24][27/28] Data 0.142 (0.206) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0387(0.0168) 884488.9717(379034.4983)Grad: 1.0888  
EVAL: [0/7] Data 0.308 (0.308) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0431(0.0431) 985292.9664(492646.4832)


Epoch 24 - avg_train_loss: 0.0168  avg_val_loss: 0.0224  time: 9s
Epoch 24 - MAE: 0.041709046063828505
Epoch 24 - Save Best Score: 0.0417 Model


EVAL: [6/7] Data 0.224 (0.248) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0413(0.0224) 944039.8794(476503.0659)
Epoch: [25][0/28] Data 0.306 (0.306) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0302(0.0302) 691143.5078(345571.7539)Grad: 0.3851  
Epoch: [25][20/28] Data 0.675 (0.368) Elapsed 0m 8s (remain 0m 2s) Loss: 0.0251(0.0156) 573030.5603(348265.6143)Grad: 0.1098  
Epoch: [25][27/28] Data 0.126 (0.419) Elapsed 0m 13s (remain 0m 0s) Loss: 0.0407(0.0156) 929464.3044(351422.4668)Grad: 0.8798  
EVAL: [0/7] Data 0.531 (0.531) Elapsed 0m 0s (remain 0m 3s) Loss: 0.0441(0.0441) 1006519.1294(503259.5647)


Epoch 25 - avg_train_loss: 0.0156  avg_val_loss: 0.0218  time: 15s
Epoch 25 - MAE: 0.04074004333868658
Epoch 25 - Save Best Score: 0.0407 Model


EVAL: [6/7] Data 0.227 (0.275) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0406(0.0218) 927701.6641(465432.7376)
Epoch: [26][0/28] Data 0.312 (0.312) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0273(0.0273) 624395.8473(312197.9236)Grad: 0.0963  
Epoch: [26][20/28] Data 0.198 (0.217) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0226(0.0157) 516215.2861(350051.1678)Grad: 0.6601  
Epoch: [26][27/28] Data 0.113 (0.209) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0320(0.0153) 731424.7509(345697.8364)Grad: 0.3296  
EVAL: [0/7] Data 0.295 (0.295) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0392(0.0392) 895737.2523(447868.6262)


Epoch 26 - avg_train_loss: 0.0153  avg_val_loss: 0.0216  time: 10s
Epoch 26 - MAE: 0.0402888528676457
Epoch 26 - Save Best Score: 0.0403 Model


EVAL: [6/7] Data 0.216 (0.239) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0395(0.0216) 902542.7566(460278.1285)
Epoch: [27][0/28] Data 0.307 (0.307) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0297(0.0297) 678224.7280(339112.3640)Grad: 0.0937  
Epoch: [27][20/28] Data 0.206 (0.209) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0279(0.0143) 638314.9008(319819.5481)Grad: 0.2847  
Epoch: [27][27/28] Data 0.119 (0.204) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0322(0.0142) 736225.6220(320402.4294)Grad: 1.1005  
EVAL: [0/7] Data 0.303 (0.303) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0397(0.0397) 906269.5987(453134.7993)


Epoch 27 - avg_train_loss: 0.0142  avg_val_loss: 0.0207  time: 9s
Epoch 27 - MAE: 0.03869049876619728
Epoch 27 - Save Best Score: 0.0387 Model


EVAL: [6/7] Data 0.212 (0.238) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0405(0.0207) 925663.3239(442017.7984)
Epoch: [28][0/28] Data 0.296 (0.296) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0226(0.0226) 516664.7135(258332.3567)Grad: 0.3070  
Epoch: [28][20/28] Data 0.199 (0.202) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0257(0.0129) 586918.9285(288504.8493)Grad: 0.7502  
Epoch: [28][27/28] Data 0.127 (0.206) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0315(0.0129) 720067.8586(291680.0379)Grad: 1.1009  
EVAL: [0/7] Data 0.297 (0.297) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0376(0.0376) 859391.3498(429695.6749)


Epoch 28 - avg_train_loss: 0.0129  avg_val_loss: 0.0203  time: 9s
Epoch 28 - MAE: 0.0378995470984182
Epoch 28 - Save Best Score: 0.0379 Model


EVAL: [6/7] Data 0.217 (0.239) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0388(0.0203) 886009.7895(432981.6107)
Epoch: [29][0/28] Data 0.302 (0.302) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0253(0.0253) 578350.5711(289175.2856)Grad: 0.9563  
Epoch: [29][20/28] Data 0.216 (0.203) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0271(0.0136) 619812.4546(302716.9547)Grad: 0.2300  
Epoch: [29][27/28] Data 0.143 (0.202) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0269(0.0133) 613929.7659(299437.8684)Grad: 0.2435  
EVAL: [0/7] Data 0.441 (0.441) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0389(0.0389) 889831.5390(444915.7695)


Epoch 29 - avg_train_loss: 0.0133  avg_val_loss: 0.0204  time: 9s
Epoch 29 - MAE: 0.0380172430878418


EVAL: [6/7] Data 0.239 (0.278) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0376(0.0204) 859266.8210(434326.2234)
Epoch: [30][0/28] Data 0.529 (0.529) Elapsed 0m 0s (remain 0m 17s) Loss: 0.0284(0.0284) 648377.7318(324188.8659)Grad: 0.3310  
Epoch: [30][20/28] Data 0.200 (0.222) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0248(0.0127) 567706.6340(282813.3729)Grad: 0.9818  
Epoch: [30][27/28] Data 0.119 (0.214) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0332(0.0127) 758189.0842(286470.6461)Grad: 0.4830  
EVAL: [0/7] Data 0.308 (0.308) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0386(0.0386) 882568.3509(441284.1754)


Epoch 30 - avg_train_loss: 0.0127  avg_val_loss: 0.0201  time: 10s
Epoch 30 - MAE: 0.037558669137453295
Epoch 30 - Save Best Score: 0.0376 Model


EVAL: [6/7] Data 0.224 (0.264) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0374(0.0201) 855135.4093(429087.2791)
Epoch: [31][0/28] Data 0.292 (0.292) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0276(0.0276) 631611.7093(315805.8547)Grad: 0.1500  
Epoch: [31][20/28] Data 0.197 (0.206) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0346(0.0182) 789607.5439(406009.5722)Grad: 0.4107  
Epoch: [31][27/28] Data 0.111 (0.201) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0581(0.0202) 1326537.2587(456692.7211)Grad: 1.0882  
EVAL: [0/7] Data 0.297 (0.297) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0545(0.0545) 1245180.4439(622590.2219)


Epoch 31 - avg_train_loss: 0.0202  avg_val_loss: 0.0275  time: 9s
Epoch 31 - MAE: 0.051313553617736825


EVAL: [6/7] Data 0.206 (0.236) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0467(0.0275) 1066962.5929(586229.2966)
Epoch: [32][0/28] Data 0.310 (0.310) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0392(0.0392) 896755.2733(448377.6367)Grad: 0.1607  
Epoch: [32][20/28] Data 0.199 (0.211) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0567(0.0236) 1295848.2673(526709.0007)Grad: 0.1360  
Epoch: [32][27/28] Data 0.124 (0.206) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0714(0.0260) 1632264.1950(586898.5132)Grad: 1.3574  
EVAL: [0/7] Data 0.302 (0.302) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0725(0.0725) 1656750.4936(828375.2468)


Epoch 32 - avg_train_loss: 0.0260  avg_val_loss: 0.0430  time: 9s
Epoch 32 - MAE: 0.08028584206772472


EVAL: [6/7] Data 0.205 (0.239) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0865(0.0430) 1976032.6856(917221.8865)
Epoch: [33][0/28] Data 0.298 (0.298) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0457(0.0457) 1045317.3900(522658.6950)Grad: 0.2008  
Epoch: [33][20/28] Data 0.220 (0.216) Elapsed 0m 6s (remain 0m 2s) Loss: 0.0555(0.0285) 1267874.5659(636136.3578)Grad: 1.0034  
Epoch: [33][27/28] Data 0.111 (0.208) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0487(0.0282) 1112983.5251(636824.8458)Grad: 0.9696  
EVAL: [0/7] Data 0.288 (0.288) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0637(0.0637) 1456315.4413(728157.7207)


Epoch 33 - avg_train_loss: 0.0282  avg_val_loss: 0.0323  time: 10s
Epoch 33 - MAE: 0.060307110489269546


EVAL: [6/7] Data 0.205 (0.233) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0726(0.0323) 1659322.7840(688975.7684)
Epoch: [34][0/28] Data 0.308 (0.308) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0534(0.0534) 1219974.2955(609987.1477)Grad: 0.2690  
Epoch: [34][20/28] Data 0.183 (0.207) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0447(0.0239) 1021698.4521(534067.2908)Grad: 0.2740  
Epoch: [34][27/28] Data 0.114 (0.201) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0393(0.0238) 897873.5643(536959.2951)Grad: 0.5577  
EVAL: [0/7] Data 0.308 (0.308) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0553(0.0553) 1262780.5455(631390.2728)


Epoch 34 - avg_train_loss: 0.0238  avg_val_loss: 0.0300  time: 9s
Epoch 34 - MAE: 0.05604881541030413


EVAL: [6/7] Data 0.214 (0.246) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0559(0.0300) 1277407.2769(640327.0735)
Epoch: [35][0/28] Data 0.310 (0.310) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0394(0.0394) 900776.3711(450388.1855)Grad: 1.0426  
Epoch: [35][20/28] Data 0.186 (0.206) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0436(0.0216) 995245.7388(481159.6004)Grad: 1.0456  
Epoch: [35][27/28] Data 0.113 (0.205) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0484(0.0211) 1105944.3697(476844.7948)Grad: 0.2591  
EVAL: [0/7] Data 0.292 (0.292) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0431(0.0431) 985508.9129(492754.4564)


Epoch 35 - avg_train_loss: 0.0211  avg_val_loss: 0.0266  time: 9s
Epoch 35 - MAE: 0.04960636110226907


EVAL: [6/7] Data 0.206 (0.235) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0556(0.0266) 1270168.1774(566725.5650)
Epoch: [36][0/28] Data 0.322 (0.322) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0447(0.0447) 1021395.6845(510697.8423)Grad: 0.4869  
Epoch: [36][20/28] Data 0.204 (0.213) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0348(0.0212) 794386.1139(473249.4991)Grad: 0.6627  
Epoch: [36][27/28] Data 0.119 (0.209) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0321(0.0215) 734045.0483(485760.1973)Grad: 0.4514  
EVAL: [0/7] Data 0.396 (0.396) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0465(0.0465) 1062584.8474(531292.4237)


Epoch 36 - avg_train_loss: 0.0215  avg_val_loss: 0.0245  time: 10s
Epoch 36 - MAE: 0.04576499541048081


EVAL: [6/7] Data 0.212 (0.292) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0463(0.0245) 1057984.9417(522840.0563)
Epoch: [37][0/28] Data 0.333 (0.333) Elapsed 0m 0s (remain 0m 11s) Loss: 0.0337(0.0337) 770985.4206(385492.7103)Grad: 0.1082  
Epoch: [37][20/28] Data 0.201 (0.202) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0438(0.0196) 1001604.9651(436587.3488)Grad: 1.4477  
Epoch: [37][27/28] Data 0.118 (0.197) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0488(0.0198) 1115510.8728(446357.4788)Grad: 1.4259  
EVAL: [0/7] Data 0.299 (0.299) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0603(0.0603) 1377116.4734(688558.2367)


Epoch 37 - avg_train_loss: 0.0198  avg_val_loss: 0.0292  time: 9s
Epoch 37 - MAE: 0.05455253270926734


EVAL: [6/7] Data 0.277 (0.250) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0530(0.0292) 1209988.7524(623232.8558)
Epoch: [38][0/28] Data 0.412 (0.412) Elapsed 0m 0s (remain 0m 13s) Loss: 0.0482(0.0482) 1101199.5918(550599.7959)Grad: 1.4225  
Epoch: [38][20/28] Data 0.210 (0.217) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0399(0.0214) 912806.1275(476279.9092)Grad: 0.1460  
Epoch: [38][27/28] Data 0.121 (0.211) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0487(0.0214) 1112259.7599(482846.8831)Grad: 0.3594  
EVAL: [0/7] Data 0.296 (0.296) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0521(0.0521) 1190800.2486(595400.1243)


Epoch 38 - avg_train_loss: 0.0214  avg_val_loss: 0.0283  time: 9s
Epoch 38 - MAE: 0.05284143302222073


EVAL: [6/7] Data 0.207 (0.237) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0518(0.0283) 1183961.6331(603684.4807)
Epoch: [39][0/28] Data 0.317 (0.317) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0393(0.0393) 897349.2324(448674.6162)Grad: 1.0051  
Epoch: [39][20/28] Data 0.202 (0.211) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0600(0.0239) 1370383.6606(532527.8645)Grad: 0.5501  
Epoch: [39][27/28] Data 0.136 (0.206) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0400(0.0234) 914418.8736(527349.0571)Grad: 0.1419  
EVAL: [0/7] Data 0.303 (0.303) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0545(0.0545) 1244426.7168(622213.3584)


Epoch 39 - avg_train_loss: 0.0234  avg_val_loss: 0.0284  time: 9s
Epoch 39 - MAE: 0.052902807575516055


EVAL: [6/7] Data 0.211 (0.241) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0647(0.0284) 1477217.3868(604385.6661)
Epoch: [40][0/28] Data 0.322 (0.322) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0530(0.0530) 1211570.0897(605785.0448)Grad: 0.9197  
Epoch: [40][20/28] Data 0.195 (0.232) Elapsed 0m 6s (remain 0m 2s) Loss: 0.0344(0.0199) 786652.3890(442987.8487)Grad: 0.7341  
Epoch: [40][27/28] Data 0.116 (0.221) Elapsed 0m 8s (remain 0m 0s) Loss: 0.0301(0.0187) 686830.8356(421342.3694)Grad: 0.4512  
EVAL: [0/7] Data 0.300 (0.300) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0411(0.0411) 938461.9586(469230.9793)


Epoch 40 - avg_train_loss: 0.0187  avg_val_loss: 0.0222  time: 10s
Epoch 40 - MAE: 0.04135326459069145


EVAL: [6/7] Data 0.205 (0.238) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0451(0.0222) 1029714.6013(472438.4611)
Epoch: [41][0/28] Data 0.306 (0.306) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0317(0.0317) 724661.7209(362330.8604)Grad: 0.5236  
Epoch: [41][20/28] Data 0.225 (0.217) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0357(0.0153) 816282.5875(340267.5671)Grad: 0.5407  
Epoch: [41][27/28] Data 0.171 (0.222) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0302(0.0151) 690715.1048(339991.4551)Grad: 0.2431  
EVAL: [0/7] Data 0.384 (0.384) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0358(0.0358) 817963.5138(408981.7569)


Epoch 41 - avg_train_loss: 0.0151  avg_val_loss: 0.0199  time: 10s
Epoch 41 - MAE: 0.03705843823880463
Epoch 41 - Save Best Score: 0.0371 Model


EVAL: [6/7] Data 0.269 (0.306) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0379(0.0199) 866986.7567(423372.4059)
Epoch: [42][0/28] Data 0.320 (0.320) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0239(0.0239) 545007.4900(272503.7450)Grad: 0.1690  
Epoch: [42][20/28] Data 0.252 (0.253) Elapsed 0m 6s (remain 0m 2s) Loss: 0.0350(0.0159) 798769.2220(355551.5667)Grad: 0.5220  
Epoch: [42][27/28] Data 0.136 (0.243) Elapsed 0m 8s (remain 0m 0s) Loss: 0.0356(0.0155) 813143.6612(350005.6240)Grad: 0.8489  
EVAL: [0/7] Data 0.313 (0.313) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0390(0.0390) 891499.6975(445749.8487)


Epoch 42 - avg_train_loss: 0.0155  avg_val_loss: 0.0205  time: 10s
Epoch 42 - MAE: 0.03829901022875042


EVAL: [6/7] Data 0.222 (0.249) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0400(0.0205) 914227.5265(437545.2761)
Epoch: [43][0/28] Data 0.309 (0.309) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0329(0.0329) 751350.5538(375675.2769)Grad: 0.7535  
Epoch: [43][20/28] Data 0.248 (0.239) Elapsed 0m 6s (remain 0m 2s) Loss: 0.0306(0.0140) 699897.2114(311502.4340)Grad: 0.4035  
Epoch: [43][27/28] Data 0.126 (0.231) Elapsed 0m 8s (remain 0m 0s) Loss: 0.0333(0.0143) 761342.9915(322576.5646)Grad: 1.4453  
EVAL: [0/7] Data 0.311 (0.311) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0351(0.0351) 801028.5306(400514.2653)


Epoch 43 - avg_train_loss: 0.0143  avg_val_loss: 0.0191  time: 10s
Epoch 43 - MAE: 0.03566147295544387
Epoch 43 - Save Best Score: 0.0357 Model


EVAL: [6/7] Data 0.230 (0.250) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0387(0.0191) 883739.8411(407412.8248)
Epoch: [44][0/28] Data 0.325 (0.325) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0220(0.0220) 503282.7596(251641.3798)Grad: 0.3069  
Epoch: [44][20/28] Data 0.210 (0.219) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0258(0.0140) 589035.3227(312603.8674)Grad: 0.4406  
Epoch: [44][27/28] Data 0.145 (0.220) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0316(0.0140) 722438.0770(316028.7255)Grad: 1.4632  
EVAL: [0/7] Data 0.311 (0.311) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0386(0.0386) 881159.2090(440579.6045)


Epoch 44 - avg_train_loss: 0.0140  avg_val_loss: 0.0209  time: 10s
Epoch 44 - MAE: 0.038915013900011064


EVAL: [6/7] Data 0.218 (0.255) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0428(0.0209) 978673.9575(444582.7447)
Epoch: [45][0/28] Data 0.314 (0.314) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0367(0.0367) 837819.5193(418909.7597)Grad: 1.6502  
Epoch: [45][20/28] Data 0.203 (0.211) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0293(0.0140) 670214.1967(313157.8024)Grad: 0.5171  
Epoch: [45][27/28] Data 0.132 (0.208) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0290(0.0137) 661951.4158(309855.8041)Grad: 0.8164  
EVAL: [0/7] Data 0.415 (0.415) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0356(0.0356) 812804.2925(406402.1462)


Epoch 45 - avg_train_loss: 0.0137  avg_val_loss: 0.0189  time: 9s
Epoch 45 - MAE: 0.03521332841132377
Epoch 45 - Save Best Score: 0.0352 Model


EVAL: [6/7] Data 0.221 (0.262) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0406(0.0189) 928684.7012(402293.0428)
Epoch: [46][0/28] Data 0.305 (0.305) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0252(0.0252) 574837.0368(287418.5184)Grad: 0.0619  
Epoch: [46][20/28] Data 0.216 (0.213) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0296(0.0136) 676374.7983(304093.5965)Grad: 0.3225  
Epoch: [46][27/28] Data 0.146 (0.216) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0416(0.0137) 949627.9293(309999.2833)Grad: 0.2000  
EVAL: [0/7] Data 0.339 (0.339) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0571(0.0571) 1303610.4219(651805.2109)


Epoch 46 - avg_train_loss: 0.0137  avg_val_loss: 0.0275  time: 10s
Epoch 46 - MAE: 0.051401051552549745


EVAL: [6/7] Data 0.227 (0.276) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0675(0.0275) 1541792.7727(587228.9346)
Epoch: [47][0/28] Data 0.312 (0.312) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0317(0.0317) 724684.8732(362342.4366)Grad: 0.1716  
Epoch: [47][20/28] Data 0.221 (0.338) Elapsed 0m 8s (remain 0m 2s) Loss: 0.0291(0.0127) 664905.6769(283988.5384)Grad: 0.8396  
Epoch: [47][27/28] Data 0.186 (0.306) Elapsed 0m 10s (remain 0m 0s) Loss: 0.0273(0.0127) 622647.0813(287029.2778)Grad: 0.7115  
EVAL: [0/7] Data 0.400 (0.400) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0373(0.0373) 852956.1975(426478.0987)


Epoch 47 - avg_train_loss: 0.0127  avg_val_loss: 0.0181  time: 12s
Epoch 47 - MAE: 0.0338152980511011
Epoch 47 - Save Best Score: 0.0338 Model


EVAL: [6/7] Data 0.221 (0.263) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0399(0.0181) 910849.8419(386321.2978)
Epoch: [48][0/28] Data 0.321 (0.321) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0197(0.0197) 450938.4361(225469.2180)Grad: 0.4795  
Epoch: [48][20/28] Data 0.197 (0.211) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0209(0.0119) 477686.1712(265235.1224)Grad: 0.1648  
Epoch: [48][27/28] Data 0.130 (0.207) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0251(0.0118) 573252.5927(267054.4513)Grad: 0.1041  
EVAL: [0/7] Data 0.313 (0.313) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0359(0.0359) 819333.3309(409666.6654)


Epoch 48 - avg_train_loss: 0.0118  avg_val_loss: 0.0186  time: 10s
Epoch 48 - MAE: 0.034671949977783036


EVAL: [6/7] Data 0.242 (0.281) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0418(0.0186) 954389.2202(396108.0824)
Epoch: [49][0/28] Data 0.300 (0.300) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0253(0.0253) 577453.0784(288726.5392)Grad: 0.8468  
Epoch: [49][20/28] Data 0.226 (0.219) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0224(0.0123) 512887.6513(274618.6992)Grad: 0.4135  
Epoch: [49][27/28] Data 0.173 (0.222) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0293(0.0125) 669415.8248(282309.1467)Grad: 1.0219  
EVAL: [0/7] Data 0.388 (0.388) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0425(0.0425) 970784.5502(485392.2751)


Epoch 49 - avg_train_loss: 0.0125  avg_val_loss: 0.0209  time: 10s
Epoch 49 - MAE: 0.039028401152473344


EVAL: [6/7] Data 0.251 (0.274) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0464(0.0209) 1060537.3144(445878.1485)
Epoch: [50][0/28] Data 0.446 (0.446) Elapsed 0m 0s (remain 0m 14s) Loss: 0.0297(0.0297) 677600.8496(338800.4248)Grad: 1.4244  
Epoch: [50][20/28] Data 0.226 (0.243) Elapsed 0m 6s (remain 0m 2s) Loss: 0.0203(0.0116) 463595.2628(258097.8838)Grad: 1.1752  
Epoch: [50][27/28] Data 0.137 (0.236) Elapsed 0m 8s (remain 0m 0s) Loss: 0.0230(0.0114) 525693.6488(257580.1619)Grad: 0.1504  
EVAL: [0/7] Data 0.329 (0.329) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0364(0.0364) 831503.1928(415751.5964)


Epoch 50 - avg_train_loss: 0.0114  avg_val_loss: 0.0176  time: 10s
Epoch 50 - MAE: 0.032917757335786266
Epoch 50 - Save Best Score: 0.0329 Model


EVAL: [6/7] Data 0.222 (0.262) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0416(0.0176) 949808.8068(376067.3804)
Epoch: [51][0/28] Data 0.312 (0.312) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0197(0.0197) 449423.9171(224711.9585)Grad: 0.2658  
Epoch: [51][20/28] Data 0.201 (0.218) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0240(0.0107) 548855.4135(239247.9999)Grad: 0.3748  
Epoch: [51][27/28] Data 0.117 (0.210) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0213(0.0106) 487075.8826(239610.1198)Grad: 0.5268  
EVAL: [0/7] Data 0.292 (0.292) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0301(0.0301) 688722.7714(344361.3857)


Epoch 51 - avg_train_loss: 0.0106  avg_val_loss: 0.0159  time: 9s
Epoch 51 - MAE: 0.02962223466289312
Epoch 51 - Save Best Score: 0.0296 Model


EVAL: [6/7] Data 0.219 (0.248) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0347(0.0159) 793381.0310(338417.8405)
Epoch: [52][0/28] Data 0.302 (0.302) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0225(0.0225) 513536.5120(256768.2560)Grad: 0.8722  
Epoch: [52][20/28] Data 0.208 (0.210) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0165(0.0100) 377555.0407(223203.3310)Grad: 0.0725  
Epoch: [52][27/28] Data 0.121 (0.204) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0146(0.0097) 333819.4635(219165.9071)Grad: 0.2567  
EVAL: [0/7] Data 0.300 (0.300) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0304(0.0304) 695087.6156(347543.8078)


Epoch 52 - avg_train_loss: 0.0097  avg_val_loss: 0.0149  time: 9s
Epoch 52 - MAE: 0.027780069235047902
Epoch 52 - Save Best Score: 0.0278 Model


EVAL: [6/7] Data 0.200 (0.230) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0303(0.0149) 693363.9171(317372.1207)
Epoch: [53][0/28] Data 0.350 (0.350) Elapsed 0m 0s (remain 0m 12s) Loss: 0.0176(0.0176) 402816.7291(201408.3646)Grad: 0.7472  
Epoch: [53][20/28] Data 0.187 (0.356) Elapsed 0m 8s (remain 0m 2s) Loss: 0.0200(0.0094) 457508.2890(208592.3760)Grad: 0.8443  
Epoch: [53][27/28] Data 0.115 (0.312) Elapsed 0m 10s (remain 0m 0s) Loss: 0.0209(0.0093) 477657.7416(210947.1087)Grad: 1.5316  
EVAL: [0/7] Data 0.296 (0.296) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0335(0.0335) 764305.2112(382152.6056)


Epoch 53 - avg_train_loss: 0.0093  avg_val_loss: 0.0156  time: 12s
Epoch 53 - MAE: 0.02912795771860717


EVAL: [6/7] Data 0.200 (0.233) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0360(0.0156) 821614.0043(332770.9883)
Epoch: [54][0/28] Data 0.296 (0.296) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0223(0.0223) 510449.4656(255224.7328)Grad: 1.0516  
Epoch: [54][20/28] Data 0.179 (0.200) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0208(0.0102) 476392.0248(227977.8457)Grad: 0.7180  
Epoch: [54][27/28] Data 0.122 (0.194) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0245(0.0100) 558914.8397(225450.9185)Grad: 1.7850  
EVAL: [0/7] Data 0.300 (0.300) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0320(0.0320) 732114.1281(366057.0641)


Epoch 54 - avg_train_loss: 0.0100  avg_val_loss: 0.0161  time: 9s
Epoch 54 - MAE: 0.030009284408445636


EVAL: [6/7] Data 0.205 (0.238) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0370(0.0161) 844883.1890(342839.6740)
Epoch: [55][0/28] Data 0.297 (0.297) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0216(0.0216) 494032.7282(247016.3641)Grad: 0.5937  
Epoch: [55][20/28] Data 0.251 (0.203) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0152(0.0088) 347335.0752(195784.2617)Grad: 0.4563  
Epoch: [55][27/28] Data 0.119 (0.198) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0225(0.0089) 515221.2261(201193.1750)Grad: 0.9507  
EVAL: [0/7] Data 0.294 (0.294) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0308(0.0308) 704851.9774(352425.9887)


Epoch 55 - avg_train_loss: 0.0089  avg_val_loss: 0.0152  time: 9s
Epoch 55 - MAE: 0.02833032238660919


EVAL: [6/7] Data 0.205 (0.239) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0341(0.0152) 779475.5113(323658.4524)
Epoch: [56][0/28] Data 0.346 (0.346) Elapsed 0m 0s (remain 0m 11s) Loss: 0.0139(0.0139) 318407.4686(159203.7343)Grad: 0.6704  
Epoch: [56][20/28] Data 0.195 (0.209) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0155(0.0081) 354877.0913(181024.6341)Grad: 0.7070  
Epoch: [56][27/28] Data 0.121 (0.207) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0300(0.0083) 685332.8722(186613.1777)Grad: 0.3542  
EVAL: [0/7] Data 0.289 (0.289) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0296(0.0296) 675970.2711(337985.1356)


Epoch 56 - avg_train_loss: 0.0083  avg_val_loss: 0.0140  time: 9s
Epoch 56 - MAE: 0.026126576074558067
Epoch 56 - Save Best Score: 0.0261 Model


EVAL: [6/7] Data 0.204 (0.231) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0273(0.0140) 623586.3248(298481.8624)
Epoch: [57][0/28] Data 0.294 (0.294) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0204(0.0204) 467014.0598(233507.0299)Grad: 0.4978  
Epoch: [57][20/28] Data 0.194 (0.195) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0159(0.0099) 362309.5807(220655.6054)Grad: 0.2080  
Epoch: [57][27/28] Data 0.120 (0.191) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0361(0.0097) 825907.0566(219466.8649)Grad: 0.6687  
EVAL: [0/7] Data 0.303 (0.303) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0339(0.0339) 773603.5901(386801.7951)


Epoch 57 - avg_train_loss: 0.0097  avg_val_loss: 0.0159  time: 9s
Epoch 57 - MAE: 0.029648411799306582


EVAL: [6/7] Data 0.261 (0.255) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0286(0.0159) 652854.6833(338716.8996)
Epoch: [58][0/28] Data 0.292 (0.292) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0165(0.0165) 377906.4112(188953.2056)Grad: 0.5390  
Epoch: [58][20/28] Data 0.209 (0.207) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0159(0.0077) 362423.0441(171655.9166)Grad: 0.1455  
Epoch: [58][27/28] Data 0.119 (0.202) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0137(0.0076) 312332.6880(171171.1925)Grad: 0.2318  
EVAL: [0/7] Data 0.289 (0.289) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0282(0.0282) 645097.0826(322548.5413)


Epoch 58 - avg_train_loss: 0.0076  avg_val_loss: 0.0135  time: 9s
Epoch 58 - MAE: 0.025206119973440296
Epoch 58 - Save Best Score: 0.0252 Model


EVAL: [6/7] Data 0.206 (0.232) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0282(0.0135) 644620.2470(287966.1408)
Epoch: [59][0/28] Data 0.301 (0.301) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0126(0.0126) 288579.8582(144289.9291)Grad: 0.0550  
Epoch: [59][20/28] Data 0.192 (0.209) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0180(0.0077) 412071.4421(172553.8355)Grad: 0.9063  
Epoch: [59][27/28] Data 0.115 (0.213) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0151(0.0077) 344457.2720(173733.9209)Grad: 0.0992  
EVAL: [0/7] Data 0.302 (0.302) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0286(0.0286) 654371.8410(327185.9205)


Epoch 59 - avg_train_loss: 0.0077  avg_val_loss: 0.0135  time: 10s
Epoch 59 - MAE: 0.025243089768226715


EVAL: [6/7] Data 0.213 (0.240) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0276(0.0135) 630286.1541(288388.5124)
Epoch: [60][0/28] Data 0.305 (0.305) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0152(0.0152) 347632.0973(173816.0486)Grad: 0.3943  
Epoch: [60][20/28] Data 0.187 (0.211) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0149(0.0071) 340178.1366(159034.5495)Grad: 0.6613  
Epoch: [60][27/28] Data 0.128 (0.207) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0189(0.0070) 432522.7260(157922.0732)Grad: 0.3832  
EVAL: [0/7] Data 0.307 (0.307) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0267(0.0267) 610684.7390(305342.3695)


Epoch 60 - avg_train_loss: 0.0070  avg_val_loss: 0.0129  time: 9s
Epoch 60 - MAE: 0.02411120759615912
Epoch 60 - Save Best Score: 0.0241 Model


EVAL: [6/7] Data 0.236 (0.254) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0271(0.0129) 619563.6524(275457.3707)
Epoch: [61][0/28] Data 0.321 (0.321) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0158(0.0158) 361027.1807(180513.5904)Grad: 0.3707  
Epoch: [61][20/28] Data 0.226 (0.213) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0100(0.0070) 227451.4649(156548.6521)Grad: 0.4212  
Epoch: [61][27/28] Data 0.118 (0.206) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0163(0.0071) 371968.3952(160284.7319)Grad: 1.1669  
EVAL: [0/7] Data 0.302 (0.302) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0271(0.0271) 619331.8738(309665.9369)


Epoch 61 - avg_train_loss: 0.0071  avg_val_loss: 0.0131  time: 9s
Epoch 61 - MAE: 0.024373042370950865


EVAL: [6/7] Data 0.219 (0.240) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0276(0.0131) 630522.3588(278448.6890)
Epoch: [62][0/28] Data 0.305 (0.305) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0135(0.0135) 307829.2432(153914.6216)Grad: 0.6682  
Epoch: [62][20/28] Data 0.204 (0.203) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0140(0.0068) 319753.5800(150772.2585)Grad: 0.2452  
Epoch: [62][27/28] Data 0.131 (0.206) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0146(0.0066) 333524.8460(149651.3904)Grad: 0.7173  
EVAL: [0/7] Data 0.295 (0.295) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0272(0.0272) 621244.7489(310622.3744)


Epoch 62 - avg_train_loss: 0.0066  avg_val_loss: 0.0126  time: 10s
Epoch 62 - MAE: 0.02359119436666485
Epoch 62 - Save Best Score: 0.0236 Model


EVAL: [6/7] Data 0.233 (0.256) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0262(0.0126) 598105.1150(269516.5090)
Epoch: [63][0/28] Data 0.390 (0.390) Elapsed 0m 0s (remain 0m 13s) Loss: 0.0127(0.0127) 290050.6474(145025.3237)Grad: 0.6370  
Epoch: [63][20/28] Data 0.197 (0.210) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0121(0.0067) 277006.8919(150469.2153)Grad: 0.6068  
Epoch: [63][27/28] Data 0.173 (0.208) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0123(0.0066) 280729.9035(149820.2963)Grad: 0.4222  
EVAL: [0/7] Data 0.334 (0.334) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0271(0.0271) 618485.4949(309242.7475)


Epoch 63 - avg_train_loss: 0.0066  avg_val_loss: 0.0126  time: 10s
Epoch 63 - MAE: 0.023594292770883027


EVAL: [6/7] Data 0.212 (0.243) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0246(0.0126) 562417.5639(269551.8923)
Epoch: [64][0/28] Data 0.302 (0.302) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0113(0.0113) 258895.3325(129447.6663)Grad: 0.1412  
Epoch: [64][20/28] Data 0.193 (0.201) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0115(0.0066) 263253.1178(146469.5331)Grad: 0.2916  
Epoch: [64][27/28] Data 0.117 (0.198) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0136(0.0065) 311551.6378(146670.4453)Grad: 0.5825  
EVAL: [0/7] Data 0.303 (0.303) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0258(0.0258) 590243.1160(295121.5580)


Epoch 64 - avg_train_loss: 0.0065  avg_val_loss: 0.0124  time: 9s
Epoch 64 - MAE: 0.02311640491857903
Epoch 64 - Save Best Score: 0.0231 Model


EVAL: [6/7] Data 0.272 (0.255) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0258(0.0124) 589918.3026(264092.2876)
Epoch: [65][0/28] Data 0.299 (0.299) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0126(0.0126) 287009.7353(143504.8677)Grad: 0.2679  
Epoch: [65][20/28] Data 0.189 (0.197) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0150(0.0067) 342935.8584(149391.5843)Grad: 0.0494  
Epoch: [65][27/28] Data 0.117 (0.195) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0130(0.0067) 297198.2229(151393.3034)Grad: 0.6892  
EVAL: [0/7] Data 0.287 (0.287) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0263(0.0263) 601175.2654(300587.6327)


Epoch 65 - avg_train_loss: 0.0067  avg_val_loss: 0.0124  time: 9s
Epoch 65 - MAE: 0.023099920824256038
Epoch 65 - Save Best Score: 0.0231 Model


EVAL: [6/7] Data 0.204 (0.226) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0247(0.0124) 565240.9549(263903.9752)
Epoch: [66][0/28] Data 0.299 (0.299) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0123(0.0123) 280020.7149(140010.3574)Grad: 0.2321  
Epoch: [66][20/28] Data 0.190 (0.206) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0105(0.0061) 239256.3800(135162.3147)Grad: 0.0586  
Epoch: [66][27/28] Data 0.115 (0.201) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0135(0.0061) 308905.5493(137836.9210)Grad: 0.2483  
EVAL: [0/7] Data 0.289 (0.289) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0261(0.0261) 596932.6034(298466.3017)


Epoch 66 - avg_train_loss: 0.0061  avg_val_loss: 0.0125  time: 9s
Epoch 66 - MAE: 0.023335500671079853


EVAL: [6/7] Data 0.200 (0.228) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0247(0.0125) 565425.1520(266595.3362)
Epoch: [67][0/28] Data 0.302 (0.302) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0127(0.0127) 290747.8982(145373.9491)Grad: 0.1535  
Epoch: [67][20/28] Data 0.185 (0.200) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0115(0.0063) 263647.1114(140901.7064)Grad: 0.3343  
Epoch: [67][27/28] Data 0.122 (0.197) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0149(0.0063) 339764.9699(141272.9386)Grad: 0.3651  
EVAL: [0/7] Data 0.379 (0.379) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0261(0.0261) 596759.8973(298379.9487)


Epoch 67 - avg_train_loss: 0.0063  avg_val_loss: 0.0124  time: 10s
Epoch 67 - MAE: 0.0231178981826274


EVAL: [6/7] Data 0.221 (0.416) Elapsed 0m 3s (remain 0m 0s) Loss: 0.0251(0.0124) 574064.5836(264109.3581)
Epoch: [68][0/28] Data 0.307 (0.307) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0138(0.0138) 314376.5822(157188.2911)Grad: 0.1379  
Epoch: [68][20/28] Data 0.192 (0.203) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0109(0.0061) 249756.1258(136179.7313)Grad: 0.1553  
Epoch: [68][27/28] Data 0.113 (0.198) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0109(0.0060) 249671.0921(135854.3322)Grad: 0.0652  
EVAL: [0/7] Data 0.289 (0.289) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0256(0.0256) 585023.7157(292511.8578)


Epoch 68 - avg_train_loss: 0.0060  avg_val_loss: 0.0122  time: 9s
Epoch 68 - MAE: 0.022848053294596483
Epoch 68 - Save Best Score: 0.0228 Model


EVAL: [6/7] Data 0.202 (0.230) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0246(0.0122) 561338.0872(261026.5193)
Epoch: [69][0/28] Data 0.295 (0.295) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0106(0.0106) 243216.0858(121608.0429)Grad: 0.6868  
Epoch: [69][20/28] Data 0.198 (0.203) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0143(0.0060) 325794.9515(133875.5843)Grad: 0.8573  
Epoch: [69][27/28] Data 0.160 (0.203) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0203(0.0060) 463878.1552(134625.2185)Grad: 0.2529  
EVAL: [0/7] Data 0.358 (0.358) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0259(0.0259) 592800.6809(296400.3405)


Epoch 69 - avg_train_loss: 0.0060  avg_val_loss: 0.0123  time: 9s
Epoch 69 - MAE: 0.022913287010606142


EVAL: [6/7] Data 0.210 (0.268) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0248(0.0123) 566860.5105(261771.7798)
Epoch: [70][0/28] Data 0.290 (0.290) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0101(0.0101) 230031.9268(115015.9634)Grad: 0.3329  
Epoch: [70][20/28] Data 0.199 (0.196) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0139(0.0057) 316781.2737(126768.5000)Grad: 1.0822  
Epoch: [70][27/28] Data 0.117 (0.194) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0112(0.0058) 256164.2104(130792.7343)Grad: 0.6270  
EVAL: [0/7] Data 0.298 (0.298) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0259(0.0259) 592559.7947(296279.8974)


Epoch 70 - avg_train_loss: 0.0058  avg_val_loss: 0.0123  time: 9s
Epoch 70 - MAE: 0.02290625420856128


EVAL: [6/7] Data 0.208 (0.267) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0247(0.0123) 564865.4107(261691.4325)
Epoch: [71][0/28] Data 0.296 (0.296) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0099(0.0099) 225103.4625(112551.7313)Grad: 0.2356  
Epoch: [71][20/28] Data 0.204 (0.203) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0179(0.0085) 409694.1162(189108.0833)Grad: 0.2636  
Epoch: [71][27/28] Data 0.118 (0.199) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0229(0.0094) 523715.9558(211650.2887)Grad: 0.6472  
EVAL: [0/7] Data 0.299 (0.299) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0359(0.0359) 820994.0840(410497.0420)


Epoch 71 - avg_train_loss: 0.0094  avg_val_loss: 0.0181  time: 9s
Epoch 71 - MAE: 0.03376985400948201


EVAL: [6/7] Data 0.209 (0.234) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0376(0.0181) 859507.2817(385802.1146)
Epoch: [72][0/28] Data 0.318 (0.318) Elapsed 0m 0s (remain 0m 12s) Loss: 0.0267(0.0267) 611157.0632(305578.5316)Grad: 1.7796  
Epoch: [72][20/28] Data 0.195 (0.215) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0222(0.0113) 508220.0762(251936.4249)Grad: 0.3725  
Epoch: [72][27/28] Data 0.124 (0.207) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0210(0.0116) 480861.1880(261005.2440)Grad: 0.4405  
EVAL: [0/7] Data 0.304 (0.304) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0352(0.0352) 804777.1630(402388.5815)


Epoch 72 - avg_train_loss: 0.0116  avg_val_loss: 0.0188  time: 9s
Epoch 72 - MAE: 0.035124823987801417


EVAL: [6/7] Data 0.211 (0.241) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0336(0.0188) 767755.4171(401281.9125)
Epoch: [73][0/28] Data 0.306 (0.306) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0240(0.0240) 549483.1222(274741.5611)Grad: 0.3956  
Epoch: [73][20/28] Data 0.194 (0.209) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0352(0.0125) 804549.4702(278194.7654)Grad: 0.3609  
Epoch: [73][27/28] Data 0.121 (0.203) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0227(0.0127) 517808.7954(285674.0839)Grad: 0.3992  
EVAL: [0/7] Data 0.288 (0.288) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0511(0.0511) 1166779.5499(583389.7750)


Epoch 73 - avg_train_loss: 0.0127  avg_val_loss: 0.0230  time: 9s
Epoch 73 - MAE: 0.04286497223102994


EVAL: [6/7] Data 0.222 (0.237) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0470(0.0230) 1074400.4449(489708.8653)
Epoch: [74][0/28] Data 0.301 (0.301) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0324(0.0324) 740169.4319(370084.7160)Grad: 0.4035  
Epoch: [74][20/28] Data 0.217 (0.214) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0500(0.0227) 1142059.0895(505565.0624)Grad: 0.2100  
Epoch: [74][27/28] Data 0.133 (0.208) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0365(0.0222) 833953.9337(501427.5517)Grad: 0.5008  
EVAL: [0/7] Data 0.303 (0.303) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0476(0.0476) 1086669.1298(543334.5649)


Epoch 74 - avg_train_loss: 0.0222  avg_val_loss: 0.0282  time: 9s
Epoch 74 - MAE: 0.052536387776993196


EVAL: [6/7] Data 0.204 (0.240) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0487(0.0282) 1113336.4277(600199.5376)
Epoch: [75][0/28] Data 0.300 (0.300) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0381(0.0381) 869742.8187(434871.4093)Grad: 0.6057  
Epoch: [75][20/28] Data 0.249 (0.202) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0397(0.0187) 906416.2584(417356.9980)Grad: 0.7670  
Epoch: [75][27/28] Data 0.114 (0.199) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0448(0.0186) 1024717.8717(420667.4156)Grad: 0.4672  
EVAL: [0/7] Data 0.314 (0.314) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0525(0.0525) 1198859.0423(599429.5211)


Epoch 75 - avg_train_loss: 0.0186  avg_val_loss: 0.0260  time: 9s
Epoch 75 - MAE: 0.04842311339744859


EVAL: [6/7] Data 0.207 (0.240) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0530(0.0260) 1211050.1840(553207.6061)
Epoch: [76][0/28] Data 0.304 (0.304) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0340(0.0340) 777536.1643(388768.0822)Grad: 0.6299  
Epoch: [76][20/28] Data 0.186 (0.194) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0263(0.0178) 602011.1321(398073.9080)Grad: 0.3917  
Epoch: [76][27/28] Data 0.108 (0.192) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0549(0.0173) 1254756.3101(389487.4858)Grad: 0.6238  
EVAL: [0/7] Data 0.412 (0.412) Elapsed 0m 0s (remain 0m 2s) Loss: 0.0428(0.0428) 979066.1849(489533.0925)


Epoch 76 - avg_train_loss: 0.0173  avg_val_loss: 0.0203  time: 9s
Epoch 76 - MAE: 0.03783039720186175


EVAL: [6/7] Data 0.210 (0.268) Elapsed 0m 2s (remain 0m 0s) Loss: 0.0391(0.0203) 892357.5248(432191.6248)
Epoch: [77][0/28] Data 0.473 (0.473) Elapsed 0m 0s (remain 0m 14s) Loss: 0.0315(0.0315) 720253.0772(360126.5386)Grad: 0.6406  
Epoch: [77][20/28] Data 0.191 (0.203) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0293(0.0152) 669352.1559(338351.5639)Grad: 0.7901  
Epoch: [77][27/28] Data 0.111 (0.196) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0275(0.0156) 627532.0499(351132.3421)Grad: 0.4171  
EVAL: [0/7] Data 0.286 (0.286) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0407(0.0407) 929075.0561(464537.5280)


Epoch 77 - avg_train_loss: 0.0156  avg_val_loss: 0.0223  time: 9s
Epoch 77 - MAE: 0.04162133666557299


EVAL: [6/7] Data 0.199 (0.223) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0381(0.0223) 870034.3506(475501.0038)
Epoch: [78][0/28] Data 0.412 (0.412) Elapsed 0m 0s (remain 0m 13s) Loss: 0.0362(0.0362) 827498.0123(413749.0062)Grad: 0.5467  
Epoch: [78][20/28] Data 0.189 (0.199) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0302(0.0157) 690268.0608(350117.3740)Grad: 0.9581  
Epoch: [78][27/28] Data 0.113 (0.194) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0340(0.0163) 775848.8541(368039.9757)Grad: 0.5150  
EVAL: [0/7] Data 0.279 (0.279) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0702(0.0702) 1603356.4849(801678.2424)


Epoch 78 - avg_train_loss: 0.0163  avg_val_loss: 0.0332  time: 9s
Epoch 78 - MAE: 0.06197893310752098


EVAL: [6/7] Data 0.197 (0.225) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0551(0.0332) 1258827.6279(708075.4578)
Epoch: [79][0/28] Data 0.299 (0.299) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0441(0.0441) 1008637.3962(504318.6981)Grad: 0.8329  
Epoch: [79][20/28] Data 0.186 (0.197) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0236(0.0189) 538583.8287(420967.3742)Grad: 0.2341  
Epoch: [79][27/28] Data 0.111 (0.192) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0426(0.0201) 973551.7628(454564.3571)Grad: 0.4091  
EVAL: [0/7] Data 0.288 (0.288) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0576(0.0576) 1315341.8369(657670.9184)


Epoch 79 - avg_train_loss: 0.0201  avg_val_loss: 0.0359  time: 9s
Epoch 79 - MAE: 0.06705768998048212


EVAL: [6/7] Data 0.210 (0.231) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0817(0.0359) 1867095.5885(766097.4785)
Epoch: [80][0/28] Data 0.302 (0.302) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0700(0.0700) 1598965.5459(799482.7730)Grad: 1.0525  
Epoch: [80][20/28] Data 0.190 (0.198) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0359(0.0206) 819328.9898(458804.5461)Grad: 0.3870  
Epoch: [80][27/28] Data 0.106 (0.202) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0711(0.0210) 1624078.8296(473130.1611)Grad: 0.2992  
EVAL: [0/7] Data 0.286 (0.286) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0587(0.0587) 1342335.9059(671167.9529)


Epoch 80 - avg_train_loss: 0.0210  avg_val_loss: 0.0256  time: 9s
Epoch 80 - MAE: 0.047794325401930744


EVAL: [6/7] Data 0.198 (0.226) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0498(0.0256) 1138732.4761(546024.0353)
Epoch: [81][0/28] Data 0.295 (0.295) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0342(0.0342) 781189.3786(390594.6893)Grad: 0.8785  
Epoch: [81][20/28] Data 0.182 (0.188) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0272(0.0168) 621229.5977(375652.8876)Grad: 0.3017  
Epoch: [81][27/28] Data 0.149 (0.190) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0336(0.0168) 766624.6988(378674.7446)Grad: 0.2151  
EVAL: [0/7] Data 0.285 (0.285) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0510(0.0510) 1164605.7006(582302.8503)


Epoch 81 - avg_train_loss: 0.0168  avg_val_loss: 0.0228  time: 9s
Epoch 81 - MAE: 0.04259930379218744


EVAL: [6/7] Data 0.193 (0.229) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0441(0.0228) 1007741.1802(486673.7636)
Epoch: [82][0/28] Data 0.305 (0.305) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0334(0.0334) 763091.1616(381545.5808)Grad: 1.1808  
Epoch: [82][20/28] Data 0.185 (0.195) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0365(0.0171) 834447.1120(380722.4525)Grad: 0.2365  
Epoch: [82][27/28] Data 0.115 (0.190) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0316(0.0178) 722900.6127(400787.6332)Grad: 0.6017  
EVAL: [0/7] Data 0.292 (0.292) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0864(0.0864) 1975034.4122(987517.2061)


Epoch 82 - avg_train_loss: 0.0178  avg_val_loss: 0.0515  time: 9s
Epoch 82 - MAE: 0.09605276094146678


EVAL: [6/7] Data 0.269 (0.242) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0907(0.0515) 2072851.2474(1097350.2623)
Epoch: [83][0/28] Data 0.339 (0.339) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0477(0.0477) 1090198.0706(545099.0353)Grad: 0.1674  
Epoch: [83][20/28] Data 0.192 (0.195) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0296(0.0187) 675213.7777(417369.3808)Grad: 0.5224  
Epoch: [83][27/28] Data 0.112 (0.191) Elapsed 0m 6s (remain 0m 0s) Loss: 0.0270(0.0178) 617056.5629(400985.0863)Grad: 0.6358  
EVAL: [0/7] Data 0.286 (0.286) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0392(0.0392) 895704.6518(447852.3259)


Epoch 83 - avg_train_loss: 0.0178  avg_val_loss: 0.0209  time: 9s
Epoch 83 - MAE: 0.03905419270243545


EVAL: [6/7] Data 0.203 (0.226) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0342(0.0209) 780301.6745(446172.8166)
Epoch: [84][0/28] Data 0.291 (0.291) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0286(0.0286) 652953.7616(326476.8808)Grad: 0.5128  
Epoch: [84][20/28] Data 0.191 (0.211) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0266(0.0136) 607914.1642(304050.5091)Grad: 0.3808  
Epoch: [84][27/28] Data 0.109 (0.202) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0254(0.0134) 581118.1667(302766.2160)Grad: 1.0705  
EVAL: [0/7] Data 0.291 (0.291) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0428(0.0428) 977698.3257(488849.1628)


Epoch 84 - avg_train_loss: 0.0134  avg_val_loss: 0.0217  time: 9s
Epoch 84 - MAE: 0.04039707019959757


EVAL: [6/7] Data 0.197 (0.225) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0465(0.0217) 1063026.4438(461514.4413)
Epoch: [85][0/28] Data 0.279 (0.279) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0278(0.0278) 635402.6458(317701.3229)Grad: 0.6663  
Epoch: [85][20/28] Data 0.187 (0.197) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0259(0.0143) 591566.2879(318437.7232)Grad: 0.3259  
Epoch: [85][27/28] Data 0.131 (0.192) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0342(0.0140) 780783.0214(315726.6962)Grad: 0.6485  
EVAL: [0/7] Data 0.307 (0.307) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0384(0.0384) 877969.7220(438984.8610)


Epoch 85 - avg_train_loss: 0.0140  avg_val_loss: 0.0205  time: 9s
Epoch 85 - MAE: 0.03821787321559568


EVAL: [6/7] Data 0.198 (0.236) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0364(0.0205) 831655.7258(436618.3146)
Epoch: [86][0/28] Data 0.294 (0.294) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0227(0.0227) 518965.4749(259482.7375)Grad: 0.2263  
Epoch: [86][20/28] Data 0.187 (0.191) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0217(0.0124) 494804.5004(276651.2381)Grad: 0.3311  
Epoch: [86][27/28] Data 0.110 (0.193) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0266(0.0121) 608841.8316(273868.4282)Grad: 0.4890  
EVAL: [0/7] Data 0.285 (0.285) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0349(0.0349) 797295.1344(398647.5672)


Epoch 86 - avg_train_loss: 0.0121  avg_val_loss: 0.0175  time: 9s
Epoch 86 - MAE: 0.0326673656077911


EVAL: [6/7] Data 0.203 (0.226) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0310(0.0175) 708244.7707(373206.7697)
Epoch: [87][0/28] Data 0.290 (0.290) Elapsed 0m 0s (remain 0m 9s) Loss: 0.0223(0.0223) 508905.5380(254452.7690)Grad: 0.7890  
Epoch: [87][20/28] Data 0.186 (0.193) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0223(0.0115) 508940.0111(256480.4166)Grad: 0.6535  
Epoch: [87][27/28] Data 0.118 (0.192) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0210(0.0113) 480505.0509(253885.2026)Grad: 0.7510  
EVAL: [0/7] Data 0.286 (0.286) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0324(0.0324) 740286.9810(370143.4905)


Epoch 87 - avg_train_loss: 0.0113  avg_val_loss: 0.0172  time: 9s
Epoch 87 - MAE: 0.0320455844603677


EVAL: [6/7] Data 0.205 (0.246) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0326(0.0172) 744230.1099(366103.2863)
Epoch: [88][0/28] Data 0.286 (0.286) Elapsed 0m 0s (remain 0m 10s) Loss: 0.0167(0.0167) 381873.5436(190936.7718)Grad: 0.9528  
Epoch: [88][20/28] Data 0.193 (0.198) Elapsed 0m 5s (remain 0m 1s) Loss: 0.0202(0.0100) 461594.1621(223628.2441)Grad: 1.0260  
Epoch: [88][27/28] Data 0.116 (0.194) Elapsed 0m 7s (remain 0m 0s) Loss: 0.0194(0.0101) 444362.4972(229035.5479)Grad: 0.8291  
EVAL: [0/7] Data 0.297 (0.297) Elapsed 0m 0s (remain 0m 1s) Loss: 0.0292(0.0292) 668092.2273(334046.1136)


Epoch 88 - avg_train_loss: 0.0101  avg_val_loss: 0.0166  time: 9s
Epoch 88 - MAE: 0.031065129202215287


EVAL: [6/7] Data 0.210 (0.236) Elapsed 0m 1s (remain 0m 0s) Loss: 0.0339(0.0166) 775638.1851(354902.1087)


In [13]:
res_valid_df.to_pickle('res_valid_df.pkl')