In [5]:
import os
import argparse
from torch.utils.tensorboard import SummaryWriter
import time
import torch
from GEvaluator import Evaluator
from Mid import GINGraphPooling
from Module import load_data,train,evaluate,test,prepartion,continue_train
print('torch version:',torch.__version__)

torch version: 2.0.1+cu118


参数输入

In [6]:
#参数输入
class MyNamespace(argparse.Namespace):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.batch_size = 20
        self.device=0
        self.drop_ratio=0.1
        self.early_stop=30
        self.early_stop_open = True
        self.emb_dim=256
        self.epochs=200
        self.graph_pooling='sum'
        self.num_layers=3
        self.n_head=3
        self.num_workers=5
        self.num_tasks=1
        self.save_test=True
        self.task_name='GINGraph_crystal_test_no_attention'
        self.weight_decay=0.5e-05
        self.learning_rate=0.0001
        self.data_type='crystal'
        self.dataset_pt = './PTs/crystal_norm'
        self.dataset_split=[0.8,0.19,0.01]
        self.begin=0
        self.evaluate_epoch=1
        self.continue_train=False
        self.checkpoint_path='./saves/GINGraph-con-v100_/checkpoint.pt'
        self.job_level='node' #graph,node
        self.attention=False #是否启用Multi-head self-attention层



数据载入

In [7]:
def main(args):
    prepartion(args)
    nn_params = {
        'num_layers': args.num_layers,
        'emb_dim': args.emb_dim,
        'n_head':args.n_head,
        'drop_ratio': args.drop_ratio,
        'graph_pooling': args.graph_pooling,
        'num_tasks':args.num_tasks,
        'data_type':args.data_type,
        'job_level':args.job_level,
        'attention':args.attention,

    }

    # automatic dataloading and splitting
    train_loader,valid_loader,test_loader=load_data(args)

    # automatic evaluator. takes dataset name as input
    evaluator = Evaluator()
    criterion_fn = torch.nn.MSELoss()

    device = args.device

    model = GINGraphPooling(**nn_params).to(device)
    optimizer =  torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.9)
    if args.continue_train:
        continue_train(args,model,optimizer)

    num_params = sum(p.numel() for p in model.parameters())
    print('train data:', len(train_loader), 'valid data:', len(valid_loader), file=args.output_file, flush=True)
    print(f'#Params: {num_params}', file=args.output_file, flush=True)
    print(model, file=args.output_file, flush=True)


    writer = SummaryWriter(log_dir=args.save_dir)

    not_improved = 0
    eva=1
    best_valid_mae = 9999
    valid_mae=10000

    for epoch in range(1, args.epochs + 1):

        print('=====epoch:', epoch,time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) )

        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"=====Epoch {}".format(epoch), file=args.output_file, flush=True)
        print('Training...', file=args.output_file, flush=True)
        train_mae,maxP,minN,avgP,avgN = train(model, device, train_loader, optimizer, criterion_fn,epoch,args.epochs)
        print(train_mae,maxP,minN,avgP,avgN)
        print('Evaluating...', file=args.output_file, flush=True)
        if epoch==eva:
            valid_mae = evaluate(model, device, valid_loader, evaluator)
            eva += args.evaluate_epoch

        print({'Train': train_mae, 'Validation': valid_mae}, file=args.output_file, flush=True)

        writer.add_scalar('valid/mae', valid_mae, epoch)
        writer.add_scalar('train/mae', train_mae, epoch)
        writer.add_scalar('train/maxP', maxP, epoch)
        writer.add_scalar('train/minN', minN, epoch)
        writer.add_scalar('train/avgP', avgP, epoch)
        writer.add_scalar('train/avgN', avgN, epoch)
        print('valid_mae:',valid_mae,'best_valid_mae:',best_valid_mae)


        if valid_mae < best_valid_mae:
            print('valid_mae:',valid_mae,'Saving checkpoint...')
            best_valid_mae = valid_mae
            if args.save_test:
                print('Saving checkpoint...', file=args.output_file, flush=True)
                checkpoint = {
                    'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(), 'best_val_mae': best_valid_mae, 'num_params': num_params
                }
                torch.save(checkpoint, os.path.join(args.save_dir, 'checkpoint.pt'))
                print('Predicting on test data...', file=args.output_file, flush=True)
                y_pred = test(model, device, test_loader)
                print('Saving test submission file...', file=args.output_file, flush=True)
                evaluator.save_test_submission({'y_pred': y_pred}, args.save_dir)

            not_improved = 0
        else:
            not_improved += 1
            if not_improved == args.early_stop:
                print(f"Have not improved for {not_improved} epoches.", file=args.output_file, flush=True)
                break

        scheduler.step()
        print(f'Best validation MAE so far: {best_valid_mae}', file=args.output_file, flush=True)

    # writer.add_graph(model,train_loader)
    writer.close()
    args.output_file.close()

In [None]:
if __name__ == '__main__':
    # args=p_args()
    args=MyNamespace()
    main(args)
    print('finish')

data loading in dir: ./PTs/crystal_norm


100%|██████████| 1/1 [00:00<00:00,  2.28it/s]


train data: 800 valid data: 190
=====epoch: 1 2023-08-05 13:42:20
on training:


Epoch 1/200: 100%|██████████| 40/40 [00:14<00:00,  2.80it/s, loss=0.16567]


0.2260716523975134 tensor(4.2936, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-6.5602, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1808, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.4115, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


Evaluate finish
valid_mae: 0.228460893034935 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.88s/it]


Test finish
=====epoch: 2 2023-08-05 13:42:48
on training:


Epoch 2/200: 100%|██████████| 40/40 [00:13<00:00,  2.97it/s, loss=0.11963]


0.14256334416568278 tensor(2.2894, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.5813, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1464, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.3317, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Evaluate finish
=====epoch: 3 2023-08-05 13:43:11
on training:


Epoch 3/200: 100%|██████████| 40/40 [00:12<00:00,  3.20it/s, loss=0.09671]


0.10540296882390976 tensor(2.0500, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.9285, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1592, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.2770, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


Evaluate finish
valid_mae: 0.19044414162635803 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.82s/it]


Test finish
=====epoch: 4 2023-08-05 13:43:36
on training:


Epoch 4/200: 100%|██████████| 40/40 [00:12<00:00,  3.14it/s, loss=0.07832]


0.08597379345446825 tensor(2.3466, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.5696, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1560, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.2460, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.02it/s]


Evaluate finish
valid_mae: 0.16238510608673096 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.67s/it]


Test finish
=====epoch: 5 2023-08-05 13:44:02
on training:


Epoch 5/200: 100%|██████████| 40/40 [00:12<00:00,  3.15it/s, loss=0.06764]


0.07435891088098287 tensor(2.3888, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.2581, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1507, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.2249, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.04it/s]


Evaluate finish
valid_mae: 0.16171042621135712 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.66s/it]


Test finish
=====epoch: 6 2023-08-05 13:44:27
on training:


Epoch 6/200: 100%|██████████| 40/40 [00:12<00:00,  3.28it/s, loss=0.05967]


0.06455541551113128 tensor(2.2052, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.6148, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1445, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.2056, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.03it/s]


Evaluate finish
valid_mae: 0.12719008326530457 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.78s/it]


Test finish
=====epoch: 7 2023-08-05 13:44:51
on training:


Epoch 7/200: 100%|██████████| 40/40 [00:11<00:00,  3.38it/s, loss=0.05516]


0.05715201776474714 tensor(2.2182, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.6157, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1406, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1909, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.03it/s]


Evaluate finish
valid_mae: 0.09488746523857117 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:03<00:00,  3.25s/it]


Test finish
=====epoch: 8 2023-08-05 13:45:16
on training:


Epoch 8/200: 100%|██████████| 40/40 [00:14<00:00,  2.70it/s, loss=0.05117]


0.052150980941951273 tensor(2.1539, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.3851, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1370, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1783, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


Evaluate finish
=====epoch: 9 2023-08-05 13:45:41
on training:


Epoch 9/200: 100%|██████████| 40/40 [00:12<00:00,  3.27it/s, loss=0.05176]


0.04817119501531124 tensor(1.9221, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.3090, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1346, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1700, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.02it/s]


Evaluate finish
valid_mae: 0.09206335246562958 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.71s/it]


Test finish
=====epoch: 10 2023-08-05 13:46:06
on training:


Epoch 10/200: 100%|██████████| 40/40 [00:13<00:00,  2.96it/s, loss=0.04367]


0.044010060653090474 tensor(1.7486, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.4674, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1308, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1610, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.03it/s]


Evaluate finish
valid_mae: 0.07456839829683304 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.71s/it]


Test finish
=====epoch: 11 2023-08-05 13:46:32
on training:


Epoch 11/200: 100%|██████████| 40/40 [00:12<00:00,  3.21it/s, loss=0.03902]


0.041567635256797075 tensor(1.8360, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.8654, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1294, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1545, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.02it/s]


Evaluate finish
valid_mae: 0.07165073603391647 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.69s/it]


Test finish
=====epoch: 12 2023-08-05 13:46:57
on training:


Epoch 12/200: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s, loss=0.04030]


0.04015544345602393 tensor(1.8221, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.5666, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1282, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1497, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.04it/s]


Evaluate finish
valid_mae: 0.06699863076210022 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.59s/it]


Test finish
=====epoch: 13 2023-08-05 13:47:21
on training:


Epoch 13/200: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s, loss=0.03567]


0.03786835065111518 tensor(1.9058, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.1691, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1264, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1442, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.04it/s]


Evaluate finish
valid_mae: 0.0664721354842186 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.64s/it]


Test finish
=====epoch: 14 2023-08-05 13:47:45
on training:


Epoch 14/200: 100%|██████████| 40/40 [00:12<00:00,  3.14it/s, loss=0.03488]


0.03651565695181489 tensor(1.8983, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.5350, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1245, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1411, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.05it/s]


Evaluate finish
valid_mae: 0.06122638285160065 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.63s/it]


Test finish
=====epoch: 15 2023-08-05 13:48:10
on training:


Epoch 15/200: 100%|██████████| 40/40 [00:12<00:00,  3.09it/s, loss=0.03231]


0.034992736857384445 tensor(1.7964, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.0299, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1228, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1373, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.04it/s]


Evaluate finish
valid_mae: 0.054541055113077164 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.72s/it]


Test finish
=====epoch: 16 2023-08-05 13:48:35
on training:


Epoch 16/200: 100%|██████████| 40/40 [00:12<00:00,  3.22it/s, loss=0.03285]


0.033200731966644526 tensor(1.9529, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.7714, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1204, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1333, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.06it/s]


Evaluate finish
valid_mae: 0.05166524648666382 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.64s/it]


Test finish
=====epoch: 17 2023-08-05 13:49:00
on training:


Epoch 17/200: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s, loss=0.03376]


0.03216349096037448 tensor(1.6664, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.7727, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1184, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1310, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.05it/s]


Evaluate finish
=====epoch: 18 2023-08-05 13:49:21
on training:


Epoch 18/200: 100%|██████████| 40/40 [00:11<00:00,  3.44it/s, loss=0.03117]


0.030673379823565484 tensor(1.9032, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.9096, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1166, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1276, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.02it/s]


Evaluate finish
=====epoch: 19 2023-08-05 13:49:43
on training:


Epoch 19/200: 100%|██████████| 40/40 [00:12<00:00,  3.11it/s, loss=0.02981]


0.029551068134605885 tensor(1.5523, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-2.0077, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1149, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1252, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Evaluate finish
=====epoch: 20 2023-08-05 13:50:06
on training:


Epoch 20/200: 100%|██████████| 40/40 [00:13<00:00,  2.87it/s, loss=0.02726]


0.028427558857947587 tensor(1.6019, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.7561, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1126, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1229, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Evaluate finish
=====epoch: 21 2023-08-05 13:50:30
on training:


Epoch 21/200: 100%|██████████| 40/40 [00:13<00:00,  3.05it/s, loss=0.02637]


0.027401902666315436 tensor(2.0111, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.7046, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1103, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1210, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


Evaluate finish
valid_mae: 0.050921276211738586 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.57s/it]


Test finish
=====epoch: 22 2023-08-05 13:50:55
on training:


Epoch 22/200: 100%|██████████| 40/40 [00:12<00:00,  3.17it/s, loss=0.02469]


0.025856968387961386 tensor(1.6293, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.6405, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1075, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1181, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


Evaluate finish
valid_mae: 0.050394412130117416 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.67s/it]


Test finish
=====epoch: 23 2023-08-05 13:51:21
on training:


Epoch 23/200: 100%|██████████| 40/40 [00:13<00:00,  3.06it/s, loss=0.02608]


0.025242955842986702 tensor(1.7143, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.7937, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1058, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1158, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Evaluate finish
valid_mae: 0.04632137343287468 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.65s/it]


Test finish
=====epoch: 24 2023-08-05 13:51:46
on training:


Epoch 24/200: 100%|██████████| 40/40 [00:11<00:00,  3.38it/s, loss=0.02516]


0.024296278646215797 tensor(1.3513, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.4463, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1037, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1135, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Evaluate finish
=====epoch: 25 2023-08-05 13:52:08
on training:


Epoch 25/200: 100%|██████████| 40/40 [00:13<00:00,  3.05it/s, loss=0.02326]


0.0234178327023983 tensor(1.4367, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.4080, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1025, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1114, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Evaluate finish
=====epoch: 26 2023-08-05 13:52:32
on training:


Epoch 26/200: 100%|██████████| 40/40 [00:13<00:00,  3.07it/s, loss=0.02284]


0.022637580428272484 tensor(1.4601, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.7811, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.1002, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1100, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


Evaluate finish
valid_mae: 0.04523822292685509 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.70s/it]


Test finish
=====epoch: 27 2023-08-05 13:52:57
on training:


Epoch 27/200: 100%|██████████| 40/40 [00:13<00:00,  3.00it/s, loss=0.02149]


0.021678366465494037 tensor(1.4324, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.6614, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.0982, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1077, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Evaluate finish
valid_mae: 0.04353000968694687 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.75s/it]


Test finish
=====epoch: 28 2023-08-05 13:53:24
on training:


Epoch 28/200: 100%|██████████| 40/40 [00:14<00:00,  2.79it/s, loss=0.02003]


0.02096710670739412 tensor(1.5743, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.2916, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.0964, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1061, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Evaluate finish
valid_mae: 0.04325243830680847 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.82s/it]


Test finish
=====epoch: 29 2023-08-05 13:53:51
on training:


Epoch 29/200: 100%|██████████| 40/40 [00:12<00:00,  3.09it/s, loss=0.02029]


0.020337064191699027 tensor(1.5163, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.2857, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.0949, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1041, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Evaluate finish
=====epoch: 30 2023-08-05 13:54:14
on training:


Epoch 30/200: 100%|██████████| 40/40 [00:12<00:00,  3.28it/s, loss=0.01884]


0.01959334802813828 tensor(1.3421, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.4811, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.0931, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1023, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.03it/s]


Evaluate finish
=====epoch: 31 2023-08-05 13:54:36
on training:


Epoch 31/200: 100%|██████████| 40/40 [00:12<00:00,  3.31it/s, loss=0.01805]


0.01880501243285835 tensor(1.3182, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.3306, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.0913, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.1003, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.03it/s]


Evaluate finish
valid_mae: 0.03962957486510277 Saving checkpoint...


Testing:: 100%|██████████| 1/1 [00:02<00:00,  2.65s/it]


Test finish
=====epoch: 32 2023-08-05 13:55:00
on training:


Epoch 32/200: 100%|██████████| 40/40 [00:12<00:00,  3.17it/s, loss=0.01778]


0.018389879493042827 tensor(1.2518, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-1.3635, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.0901, device='cuda:0', grad_fn=<DivBackward0>) tensor(-0.0990, device='cuda:0', grad_fn=<DivBackward0>)


Evaluating:: 100%|██████████| 10/10 [00:09<00:00,  1.04it/s]


Evaluate finish
=====epoch: 33 2023-08-05 13:55:23
on training:


Epoch 33/200:   0%|          | 0/40 [00:00<?, ?it/s]