In [40]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
cd ..

C:\Projects\python\recommender


In [20]:
from functools import partial
from pathlib import Path

import numpy as np
import pandas as pd
import torch as T
import torch.optim as optim

from utils import get_log_dir

In [38]:
from datasets import TorchMovielen10k, TorchTopcoder, DataBunch
from models import FMLearner, TorchFM, TorchHrmFM, TorchPrmeFM, TorchTransFM
from models.fm_learner import simple_loss, trans_loss

In [5]:
DEVICE = T.cuda.current_device()
BATCH = 2000
SHUFFLE = True
WORKERS = 0
REGS_PATH = Path("./inputs/topcoder/regs.csv")

In [6]:
db = TorchTopcoder(REGS_PATH)
db

2019-09-25 20:36:44,356 - C:\Projects\python\recommender\utils.py - INFO - Read dataset in inputs\topcoder\regs.csv
2019-09-25 20:36:44,356 - C:\Projects\python\recommender\utils.py - INFO - Read dataset in inputs\topcoder\regs.csv
2019-09-25 20:36:44,356 - C:\Projects\python\recommender\utils.py - INFO - Read dataset in inputs\topcoder\regs.csv
I0925 20:36:44.356832  2240 torch_topcoder.py:47] Read dataset in inputs\topcoder\regs.csv
2019-09-25 20:36:44,363 - C:\Projects\python\recommender\utils.py - INFO - Original regs shape: (610025, 3)
2019-09-25 20:36:44,363 - C:\Projects\python\recommender\utils.py - INFO - Original regs shape: (610025, 3)
2019-09-25 20:36:44,363 - C:\Projects\python\recommender\utils.py - INFO - Original regs shape: (610025, 3)
I0925 20:36:44.363837  2240 torch_topcoder.py:48] Original regs shape: (610025, 3)
2019-09-25 20:36:44,489 - C:\Projects\python\recommender\utils.py - INFO - Original registants size: 60017
2019-09-25 20:36:44,489 - C:\Projects\python\re

<datasets.torch_topcoder.TorchTopcoder at 0x1f26d1eeb38>

In [8]:
db.config_db(batch_size=BATCH,
                      shuffle=SHUFFLE,
                      num_workers=WORKERS,
                      device=DEVICE)

In [10]:
feat_dim = db.feat_dim
NUM_DIM = 124
INIT_MEAN = 0.1

### Create criterion

In [11]:
# regst setting
LINEAR_REG = 1
EMB_REG = 1
TRANS_REG = 1

In [12]:
simple_loss_callback = partial(simple_loss, LINEAR_REG, EMB_REG)
simple_loss_callback

functools.partial(<function simple_loss at 0x000001F26D1EAAE8>, 1, 1)

In [13]:
trans_loss_callback = partial(trans_loss, LINEAR_REG, EMB_REG, TRANS_REG)
trans_loss_callback

functools.partial(<function trans_loss at 0x000001F26D246BF8>, 1, 1, 1)

### Train model

#### Hyper-parameter

In [14]:
feat_dim = db.feat_dim
NUM_DIM = 124
INIT_MEAN = 0.1

#### Train FM Model

In [15]:
LEARNING_RATE = 0.001
DECAY_FREQ = 1000
DECAY_GAMMA = 1

In [16]:
fm_model = TorchFM(feature_dim=feat_dim, num_dim=NUM_DIM, init_mean=INIT_MEAN)
fm_model

TorchFM()

In [17]:
adam_opt = optim.Adam(fm_model.parameters(), lr=LEARNING_RATE)
schedular = optim.lr_scheduler.StepLR(adam_opt, step_size=DECAY_FREQ, gamma=DECAY_GAMMA)

In [19]:
fm_learner = FMLearner(fm_model, adam_opt, schedular, db)
fm_learner

<models.fm_learner.FMLearner at 0x1f27495e438>

In [21]:
fm_learner.fit(epoch=8, loss_callback=simple_loss_callback, log_dir=get_log_dir('topcoder', 'fm'))

  0%|                                                                                                                                                                 | 0/8 [00:00<?, ?it/s]

Epoch: 0
Epoch 0 step 0: training loss: 37675.703864504314
Epoch 0 step 1: training accuarcy: 0.325
Epoch 0 step 1: training loss: 36599.736475448546
Epoch 0 step 2: training accuarcy: 0.34750000000000003
Epoch 0 step 2: training loss: 35615.31982925032
Epoch 0 step 3: training accuarcy: 0.35100000000000003
Epoch 0 step 3: training loss: 34615.4817054277
Epoch 0 step 4: training accuarcy: 0.379
Epoch 0 step 4: training loss: 33644.801296626974
Epoch 0 step 5: training accuarcy: 0.395
Epoch 0 step 5: training loss: 32720.449225403172
Epoch 0 step 6: training accuarcy: 0.43
Epoch 0 step 6: training loss: 31828.289722670477
Epoch 0 step 7: training accuarcy: 0.41200000000000003
Epoch 0 step 7: training loss: 30897.08321612749
Epoch 0 step 8: training accuarcy: 0.47200000000000003
Epoch 0 step 8: training loss: 30067.42791356266
Epoch 0 step 9: training accuarcy: 0.47800000000000004
Epoch 0 step 9: training loss: 29184.55817753484
Epoch 0 step 10: training accuarcy: 0.537
Epoch 0 step 10: 

Epoch 0 step 85: training loss: 2653.4429234977392
Epoch 0 step 86: training accuarcy: 0.9635
Epoch 0 step 86: training loss: 2550.409468341607
Epoch 0 step 87: training accuarcy: 0.9715
Epoch 0 step 87: training loss: 2469.2917007705064
Epoch 0 step 88: training accuarcy: 0.9665
Epoch 0 step 88: training loss: 2414.710345401264
Epoch 0 step 89: training accuarcy: 0.9645
Epoch 0 step 89: training loss: 2333.4486915904654
Epoch 0 step 90: training accuarcy: 0.9635
Epoch 0 step 90: training loss: 2267.7874215121824
Epoch 0 step 91: training accuarcy: 0.961
Epoch 0 step 91: training loss: 2183.3832112490873
Epoch 0 step 92: training accuarcy: 0.9685
Epoch 0 step 92: training loss: 2116.704745038552
Epoch 0 step 93: training accuarcy: 0.9715
Epoch 0 step 93: training loss: 2057.723475437113
Epoch 0 step 94: training accuarcy: 0.9715
Epoch 0 step 94: training loss: 1984.0326964775147
Epoch 0 step 95: training accuarcy: 0.9725
Epoch 0 step 95: training loss: 1936.757999925666
Epoch 0 step 96

Epoch 0 step 172: training accuarcy: 0.9945
Epoch 0 step 172: training loss: 425.0257884882394
Epoch 0 step 173: training accuarcy: 0.9945
Epoch 0 step 173: training loss: 421.4626819664196
Epoch 0 step 174: training accuarcy: 0.993
Epoch 0 step 174: training loss: 414.43250600558804
Epoch 0 step 175: training accuarcy: 0.9935
Epoch 0 step 175: training loss: 409.8280806638817
Epoch 0 step 176: training accuarcy: 0.9945
Epoch 0 step 176: training loss: 407.12696780746546
Epoch 0 step 177: training accuarcy: 0.9915
Epoch 0 step 177: training loss: 399.084360623394
Epoch 0 step 178: training accuarcy: 0.997
Epoch 0 step 178: training loss: 394.00021115296954
Epoch 0 step 179: training accuarcy: 0.9945
Epoch 0 step 179: training loss: 403.04572063116314
Epoch 0 step 180: training accuarcy: 0.9965
Epoch 0 step 180: training loss: 398.4237869793317
Epoch 0 step 181: training accuarcy: 0.993
Epoch 0 step 181: training loss: 396.7086957094257
Epoch 0 step 182: training accuarcy: 0.993
Epoch 0

Epoch 0 step 258: training accuarcy: 0.9965
Epoch 0 step 258: training loss: 322.6649725226142
Epoch 0 step 259: training accuarcy: 0.997
Epoch 0 step 259: training loss: 317.7464473078878
Epoch 0 step 260: training accuarcy: 0.999
Epoch 0 step 260: training loss: 321.80251178393627
Epoch 0 step 261: training accuarcy: 0.997
Epoch 0 step 261: training loss: 326.00470199059305
Epoch 0 step 262: training accuarcy: 0.995
Epoch 0 step 262: training loss: 244.2342229596161
Epoch 0 step 263: training accuarcy: 0.9948717948717949
Epoch 0: train loss 4955.651903589199, train accuarcy 0.8019078373908997
Epoch 0: valid loss 1073.815513813322, valid accuarcy 0.9551243185997009


 12%|███████████████████                                                                                                                                     | 1/8 [01:58<13:47, 118.15s/it]

Epoch: 1
Epoch 1 step 263: training loss: 309.1691628978947
Epoch 1 step 264: training accuarcy: 0.998
Epoch 1 step 264: training loss: 315.2735084747016
Epoch 1 step 265: training accuarcy: 0.9975
Epoch 1 step 265: training loss: 319.65269007600955
Epoch 1 step 266: training accuarcy: 0.9975
Epoch 1 step 266: training loss: 312.6047788514495
Epoch 1 step 267: training accuarcy: 0.997
Epoch 1 step 267: training loss: 328.70305530856047
Epoch 1 step 268: training accuarcy: 0.998
Epoch 1 step 268: training loss: 324.4337716981763
Epoch 1 step 269: training accuarcy: 0.9955
Epoch 1 step 269: training loss: 317.2617600904713
Epoch 1 step 270: training accuarcy: 0.998
Epoch 1 step 270: training loss: 309.15623288516286
Epoch 1 step 271: training accuarcy: 0.996
Epoch 1 step 271: training loss: 321.16579564425945
Epoch 1 step 272: training accuarcy: 0.997
Epoch 1 step 272: training loss: 313.3828973496984
Epoch 1 step 273: training accuarcy: 0.9995
Epoch 1 step 273: training loss: 305.616113

Epoch 1 step 349: training loss: 316.5082103115984
Epoch 1 step 350: training accuarcy: 0.9995
Epoch 1 step 350: training loss: 321.30688448752596
Epoch 1 step 351: training accuarcy: 0.9925
Epoch 1 step 351: training loss: 311.8097373960985
Epoch 1 step 352: training accuarcy: 0.996
Epoch 1 step 352: training loss: 318.33969803620175
Epoch 1 step 353: training accuarcy: 0.995
Epoch 1 step 353: training loss: 312.16812255355273
Epoch 1 step 354: training accuarcy: 0.996
Epoch 1 step 354: training loss: 318.0854629421592
Epoch 1 step 355: training accuarcy: 0.996
Epoch 1 step 355: training loss: 304.59511971761333
Epoch 1 step 356: training accuarcy: 0.996
Epoch 1 step 356: training loss: 310.72339122395226
Epoch 1 step 357: training accuarcy: 0.999
Epoch 1 step 357: training loss: 305.9250289022699
Epoch 1 step 358: training accuarcy: 0.998
Epoch 1 step 358: training loss: 320.8448196466692
Epoch 1 step 359: training accuarcy: 0.9965
Epoch 1 step 359: training loss: 309.1239679152412
E

Epoch 1 step 435: training loss: 324.48042684495215
Epoch 1 step 436: training accuarcy: 0.9955
Epoch 1 step 436: training loss: 304.17924590749885
Epoch 1 step 437: training accuarcy: 0.996
Epoch 1 step 437: training loss: 315.96906691799165
Epoch 1 step 438: training accuarcy: 0.997
Epoch 1 step 438: training loss: 316.2006755559568
Epoch 1 step 439: training accuarcy: 0.996
Epoch 1 step 439: training loss: 308.84882102258524
Epoch 1 step 440: training accuarcy: 0.997
Epoch 1 step 440: training loss: 321.23936312305887
Epoch 1 step 441: training accuarcy: 0.9955
Epoch 1 step 441: training loss: 313.2956504290179
Epoch 1 step 442: training accuarcy: 0.9965
Epoch 1 step 442: training loss: 309.08133367924427
Epoch 1 step 443: training accuarcy: 0.9965
Epoch 1 step 443: training loss: 321.3998276253459
Epoch 1 step 444: training accuarcy: 0.996
Epoch 1 step 444: training loss: 314.88458205243865
Epoch 1 step 445: training accuarcy: 0.9965
Epoch 1 step 445: training loss: 323.44293571601

Epoch 1 step 521: training loss: 313.53631303783516
Epoch 1 step 522: training accuarcy: 0.996
Epoch 1 step 522: training loss: 321.8350621190158
Epoch 1 step 523: training accuarcy: 0.996
Epoch 1 step 523: training loss: 309.98878794692473
Epoch 1 step 524: training accuarcy: 0.9955
Epoch 1 step 524: training loss: 330.7124737356661
Epoch 1 step 525: training accuarcy: 0.9955
Epoch 1 step 525: training loss: 230.6820001954395
Epoch 1 step 526: training accuarcy: 0.9987179487179487
Epoch 1: train loss 318.5069418671539, train accuarcy 0.9515917301177979
Epoch 1: valid loss 1041.2307226321534, valid accuarcy 0.9625025391578674


 25%|██████████████████████████████████████                                                                                                                  | 2/8 [03:59<11:54, 119.10s/it]

Epoch: 2
Epoch 2 step 526: training loss: 306.4784648039956
Epoch 2 step 527: training accuarcy: 0.9975
Epoch 2 step 527: training loss: 319.4942709638066
Epoch 2 step 528: training accuarcy: 0.993
Epoch 2 step 528: training loss: 307.6946451425526
Epoch 2 step 529: training accuarcy: 0.999
Epoch 2 step 529: training loss: 299.4949892547625
Epoch 2 step 530: training accuarcy: 0.998
Epoch 2 step 530: training loss: 313.59830348378216
Epoch 2 step 531: training accuarcy: 0.9955
Epoch 2 step 531: training loss: 316.18446705271384
Epoch 2 step 532: training accuarcy: 0.9945
Epoch 2 step 532: training loss: 309.46603645860154
Epoch 2 step 533: training accuarcy: 0.997
Epoch 2 step 533: training loss: 313.3375216390581
Epoch 2 step 534: training accuarcy: 0.9965
Epoch 2 step 534: training loss: 320.2534815752594
Epoch 2 step 535: training accuarcy: 0.997
Epoch 2 step 535: training loss: 315.9165406368588
Epoch 2 step 536: training accuarcy: 0.997
Epoch 2 step 536: training loss: 318.5481019

Epoch 2 step 612: training loss: 335.3512729490448
Epoch 2 step 613: training accuarcy: 0.9945
Epoch 2 step 613: training loss: 328.24863371124377
Epoch 2 step 614: training accuarcy: 0.997
Epoch 2 step 614: training loss: 313.0704500888211
Epoch 2 step 615: training accuarcy: 0.9965
Epoch 2 step 615: training loss: 314.64613084620754
Epoch 2 step 616: training accuarcy: 0.997
Epoch 2 step 616: training loss: 335.1671442164569
Epoch 2 step 617: training accuarcy: 0.996
Epoch 2 step 617: training loss: 324.79082520478624
Epoch 2 step 618: training accuarcy: 0.995
Epoch 2 step 618: training loss: 317.2795695637067
Epoch 2 step 619: training accuarcy: 0.993
Epoch 2 step 619: training loss: 316.524652789863
Epoch 2 step 620: training accuarcy: 0.996
Epoch 2 step 620: training loss: 321.8836638115931
Epoch 2 step 621: training accuarcy: 0.9965
Epoch 2 step 621: training loss: 311.42338813112076
Epoch 2 step 622: training accuarcy: 0.997
Epoch 2 step 622: training loss: 324.5668568387207
Epo

Epoch 2 step 698: training loss: 322.0038442876637
Epoch 2 step 699: training accuarcy: 0.995
Epoch 2 step 699: training loss: 312.0435734269805
Epoch 2 step 700: training accuarcy: 0.997
Epoch 2 step 700: training loss: 318.85613161299625
Epoch 2 step 701: training accuarcy: 0.9945
Epoch 2 step 701: training loss: 341.679649477487
Epoch 2 step 702: training accuarcy: 0.989
Epoch 2 step 702: training loss: 316.8991271748211
Epoch 2 step 703: training accuarcy: 0.994
Epoch 2 step 703: training loss: 321.88526525589646
Epoch 2 step 704: training accuarcy: 0.9965
Epoch 2 step 704: training loss: 328.7915622176679
Epoch 2 step 705: training accuarcy: 0.995
Epoch 2 step 705: training loss: 323.26891578669847
Epoch 2 step 706: training accuarcy: 0.997
Epoch 2 step 706: training loss: 326.861486175447
Epoch 2 step 707: training accuarcy: 0.996
Epoch 2 step 707: training loss: 308.0951456016287
Epoch 2 step 708: training accuarcy: 0.995
Epoch 2 step 708: training loss: 319.57972280273066
Epoch

Epoch 2 step 784: training loss: 319.28621600272703
Epoch 2 step 785: training accuarcy: 0.9935
Epoch 2 step 785: training loss: 315.3963780702619
Epoch 2 step 786: training accuarcy: 0.9975
Epoch 2 step 786: training loss: 321.0749453260568
Epoch 2 step 787: training accuarcy: 0.996
Epoch 2 step 787: training loss: 323.7255606776381
Epoch 2 step 788: training accuarcy: 0.9955
Epoch 2 step 788: training loss: 236.53689298205455
Epoch 2 step 789: training accuarcy: 0.9961538461538462
Epoch 2: train loss 319.33613336497325, train accuarcy 0.952167272567749
Epoch 2: valid loss 1033.96088070751, valid accuarcy 0.9609864354133606


 38%|█████████████████████████████████████████████████████████                                                                                               | 3/8 [06:00<09:59, 119.81s/it]

Epoch: 3
Epoch 3 step 789: training loss: 309.65848292783585
Epoch 3 step 790: training accuarcy: 0.997
Epoch 3 step 790: training loss: 309.9977250349506
Epoch 3 step 791: training accuarcy: 0.995
Epoch 3 step 791: training loss: 324.94450242543166
Epoch 3 step 792: training accuarcy: 0.9975
Epoch 3 step 792: training loss: 300.41720849251215
Epoch 3 step 793: training accuarcy: 0.998
Epoch 3 step 793: training loss: 302.750009583625
Epoch 3 step 794: training accuarcy: 0.9965
Epoch 3 step 794: training loss: 309.11450940913477
Epoch 3 step 795: training accuarcy: 0.995
Epoch 3 step 795: training loss: 322.5879538555329
Epoch 3 step 796: training accuarcy: 0.995
Epoch 3 step 796: training loss: 304.9679905126494
Epoch 3 step 797: training accuarcy: 0.9975
Epoch 3 step 797: training loss: 319.77697752689426
Epoch 3 step 798: training accuarcy: 0.9975
Epoch 3 step 798: training loss: 308.3493692727157
Epoch 3 step 799: training accuarcy: 0.9945
Epoch 3 step 799: training loss: 316.75691

Epoch 3 step 875: training loss: 325.8313165517123
Epoch 3 step 876: training accuarcy: 0.994
Epoch 3 step 876: training loss: 316.68664731604605
Epoch 3 step 877: training accuarcy: 0.9985
Epoch 3 step 877: training loss: 319.8295183403604
Epoch 3 step 878: training accuarcy: 0.996
Epoch 3 step 878: training loss: 313.61119126499807
Epoch 3 step 879: training accuarcy: 0.998
Epoch 3 step 879: training loss: 320.9779839784021
Epoch 3 step 880: training accuarcy: 0.998
Epoch 3 step 880: training loss: 329.09931322127034
Epoch 3 step 881: training accuarcy: 0.9975
Epoch 3 step 881: training loss: 315.49971653257023
Epoch 3 step 882: training accuarcy: 0.9965
Epoch 3 step 882: training loss: 313.2860025150304
Epoch 3 step 883: training accuarcy: 0.9975
Epoch 3 step 883: training loss: 330.23658252738323
Epoch 3 step 884: training accuarcy: 0.9975
Epoch 3 step 884: training loss: 322.04783865337254
Epoch 3 step 885: training accuarcy: 0.998
Epoch 3 step 885: training loss: 310.242671427371

Epoch 3 step 961: training loss: 315.10795976590293
Epoch 3 step 962: training accuarcy: 0.9985
Epoch 3 step 962: training loss: 322.17942949206616
Epoch 3 step 963: training accuarcy: 0.996
Epoch 3 step 963: training loss: 336.12249063814636
Epoch 3 step 964: training accuarcy: 0.9975
Epoch 3 step 964: training loss: 325.95476129312794
Epoch 3 step 965: training accuarcy: 0.999
Epoch 3 step 965: training loss: 331.7735338624691
Epoch 3 step 966: training accuarcy: 0.9975
Epoch 3 step 966: training loss: 321.3858923054422
Epoch 3 step 967: training accuarcy: 0.996
Epoch 3 step 967: training loss: 317.4350513110139
Epoch 3 step 968: training accuarcy: 0.995
Epoch 3 step 968: training loss: 330.81026620412433
Epoch 3 step 969: training accuarcy: 0.997
Epoch 3 step 969: training loss: 328.77890621360024
Epoch 3 step 970: training accuarcy: 0.9955
Epoch 3 step 970: training loss: 326.11887582023985
Epoch 3 step 971: training accuarcy: 0.9975
Epoch 3 step 971: training loss: 321.91049315111

Epoch 3 step 1046: training loss: 319.65152435874745
Epoch 3 step 1047: training accuarcy: 0.996
Epoch 3 step 1047: training loss: 319.87076043147556
Epoch 3 step 1048: training accuarcy: 0.9965
Epoch 3 step 1048: training loss: 319.6107963018667
Epoch 3 step 1049: training accuarcy: 0.9965
Epoch 3 step 1049: training loss: 320.80166476879174
Epoch 3 step 1050: training accuarcy: 0.9975
Epoch 3 step 1050: training loss: 340.15429999419575
Epoch 3 step 1051: training accuarcy: 0.996
Epoch 3 step 1051: training loss: 238.53677211808397
Epoch 3 step 1052: training accuarcy: 0.9987179487179487
Epoch 3: train loss 322.1046866738541, train accuarcy 0.9522033333778381
Epoch 3: valid loss 1027.8526387736672, valid accuarcy 0.9556296467781067


 50%|████████████████████████████████████████████████████████████████████████████                                                                            | 4/8 [08:00<07:59, 119.86s/it]

Epoch: 4
Epoch 4 step 1052: training loss: 319.21165766951145
Epoch 4 step 1053: training accuarcy: 0.9975
Epoch 4 step 1053: training loss: 309.45178519129286
Epoch 4 step 1054: training accuarcy: 0.997
Epoch 4 step 1054: training loss: 309.81004367733107
Epoch 4 step 1055: training accuarcy: 0.996
Epoch 4 step 1055: training loss: 310.5198722611402
Epoch 4 step 1056: training accuarcy: 0.997
Epoch 4 step 1056: training loss: 323.1899221318025
Epoch 4 step 1057: training accuarcy: 0.997
Epoch 4 step 1057: training loss: 318.6667397092086
Epoch 4 step 1058: training accuarcy: 0.9955
Epoch 4 step 1058: training loss: 311.5701856276213
Epoch 4 step 1059: training accuarcy: 0.996
Epoch 4 step 1059: training loss: 317.8293964409372
Epoch 4 step 1060: training accuarcy: 0.999
Epoch 4 step 1060: training loss: 308.6348871294265
Epoch 4 step 1061: training accuarcy: 0.998
Epoch 4 step 1061: training loss: 319.7230498930651
Epoch 4 step 1062: training accuarcy: 0.9955
Epoch 4 step 1062: traini

Epoch 4 step 1136: training loss: 324.5529075077276
Epoch 4 step 1137: training accuarcy: 0.9945
Epoch 4 step 1137: training loss: 316.77602515282
Epoch 4 step 1138: training accuarcy: 0.9975
Epoch 4 step 1138: training loss: 317.26011122298974
Epoch 4 step 1139: training accuarcy: 0.991
Epoch 4 step 1139: training loss: 327.899051025331
Epoch 4 step 1140: training accuarcy: 0.995
Epoch 4 step 1140: training loss: 323.5658737617201
Epoch 4 step 1141: training accuarcy: 0.993
Epoch 4 step 1141: training loss: 317.68139775264194
Epoch 4 step 1142: training accuarcy: 0.996
Epoch 4 step 1142: training loss: 334.4911517491739
Epoch 4 step 1143: training accuarcy: 0.9975
Epoch 4 step 1143: training loss: 318.94899711786655
Epoch 4 step 1144: training accuarcy: 0.9955
Epoch 4 step 1144: training loss: 335.60331938779166
Epoch 4 step 1145: training accuarcy: 0.9955
Epoch 4 step 1145: training loss: 308.6023671127402
Epoch 4 step 1146: training accuarcy: 0.9985
Epoch 4 step 1146: training loss:

Epoch 4 step 1221: training accuarcy: 0.9945
Epoch 4 step 1221: training loss: 334.85419178521283
Epoch 4 step 1222: training accuarcy: 0.996
Epoch 4 step 1222: training loss: 323.01065349691623
Epoch 4 step 1223: training accuarcy: 0.9965
Epoch 4 step 1223: training loss: 321.86714807538544
Epoch 4 step 1224: training accuarcy: 0.9955
Epoch 4 step 1224: training loss: 332.78325092166824
Epoch 4 step 1225: training accuarcy: 0.9965
Epoch 4 step 1225: training loss: 311.8968427532475
Epoch 4 step 1226: training accuarcy: 0.9975
Epoch 4 step 1226: training loss: 333.61774512533475
Epoch 4 step 1227: training accuarcy: 0.996
Epoch 4 step 1227: training loss: 326.6551326529108
Epoch 4 step 1228: training accuarcy: 0.9955
Epoch 4 step 1228: training loss: 317.50267456543395
Epoch 4 step 1229: training accuarcy: 0.9955
Epoch 4 step 1229: training loss: 324.97275796019886
Epoch 4 step 1230: training accuarcy: 0.997
Epoch 4 step 1230: training loss: 324.329392598824
Epoch 4 step 1231: training

Epoch 4 step 1305: training loss: 322.75840928172306
Epoch 4 step 1306: training accuarcy: 0.994
Epoch 4 step 1306: training loss: 324.64641039305707
Epoch 4 step 1307: training accuarcy: 0.9965
Epoch 4 step 1307: training loss: 326.67384095468213
Epoch 4 step 1308: training accuarcy: 0.996
Epoch 4 step 1308: training loss: 329.53041262228925
Epoch 4 step 1309: training accuarcy: 0.9955
Epoch 4 step 1309: training loss: 325.08932070852615
Epoch 4 step 1310: training accuarcy: 0.997
Epoch 4 step 1310: training loss: 316.1974464146633
Epoch 4 step 1311: training accuarcy: 0.997
Epoch 4 step 1311: training loss: 334.3833373807645
Epoch 4 step 1312: training accuarcy: 0.996
Epoch 4 step 1312: training loss: 329.6752810008255
Epoch 4 step 1313: training accuarcy: 0.9965
Epoch 4 step 1313: training loss: 331.7108628456672
Epoch 4 step 1314: training accuarcy: 0.9955
Epoch 4 step 1314: training loss: 229.19482363461373
Epoch 4 step 1315: training accuarcy: 0.9961538461538462
Epoch 4: train lo

 62%|███████████████████████████████████████████████████████████████████████████████████████████████                                                         | 5/8 [09:53<05:53, 117.78s/it]

Epoch: 5
Epoch 5 step 1315: training loss: 300.19705339384643
Epoch 5 step 1316: training accuarcy: 0.998
Epoch 5 step 1316: training loss: 309.2830322771522
Epoch 5 step 1317: training accuarcy: 0.997
Epoch 5 step 1317: training loss: 310.63042680173066
Epoch 5 step 1318: training accuarcy: 0.9975
Epoch 5 step 1318: training loss: 315.26146567146594
Epoch 5 step 1319: training accuarcy: 0.9965
Epoch 5 step 1319: training loss: 319.5794830087975
Epoch 5 step 1320: training accuarcy: 0.996
Epoch 5 step 1320: training loss: 324.05532209742216
Epoch 5 step 1321: training accuarcy: 0.998
Epoch 5 step 1321: training loss: 314.7399272664992
Epoch 5 step 1322: training accuarcy: 0.9955
Epoch 5 step 1322: training loss: 310.401231960229
Epoch 5 step 1323: training accuarcy: 0.998
Epoch 5 step 1323: training loss: 315.6578238625169
Epoch 5 step 1324: training accuarcy: 0.996
Epoch 5 step 1324: training loss: 319.7482102806622
Epoch 5 step 1325: training accuarcy: 0.9995
Epoch 5 step 1325: train

Epoch 5 step 1399: training loss: 333.1297917416314
Epoch 5 step 1400: training accuarcy: 0.9935
Epoch 5 step 1400: training loss: 317.84959706139466
Epoch 5 step 1401: training accuarcy: 0.995
Epoch 5 step 1401: training loss: 333.7308168298234
Epoch 5 step 1402: training accuarcy: 0.994
Epoch 5 step 1402: training loss: 337.80462862497615
Epoch 5 step 1403: training accuarcy: 0.996
Epoch 5 step 1403: training loss: 327.70672903202126
Epoch 5 step 1404: training accuarcy: 0.9965
Epoch 5 step 1404: training loss: 322.03576450288915
Epoch 5 step 1405: training accuarcy: 0.996
Epoch 5 step 1405: training loss: 337.13389299473124
Epoch 5 step 1406: training accuarcy: 0.9965
Epoch 5 step 1406: training loss: 326.72214002075674
Epoch 5 step 1407: training accuarcy: 0.9955
Epoch 5 step 1407: training loss: 331.5398332722882
Epoch 5 step 1408: training accuarcy: 0.9965
Epoch 5 step 1408: training loss: 333.28833341334155
Epoch 5 step 1409: training accuarcy: 0.996
Epoch 5 step 1409: training 

Epoch 5 step 1483: training loss: 330.76137930032814
Epoch 5 step 1484: training accuarcy: 0.993
Epoch 5 step 1484: training loss: 333.51524889084703
Epoch 5 step 1485: training accuarcy: 0.993
Epoch 5 step 1485: training loss: 324.49771735477634
Epoch 5 step 1486: training accuarcy: 0.9965
Epoch 5 step 1486: training loss: 338.13683528146214
Epoch 5 step 1487: training accuarcy: 0.9945
Epoch 5 step 1487: training loss: 323.2803831897743
Epoch 5 step 1488: training accuarcy: 0.997
Epoch 5 step 1488: training loss: 323.26534601577896
Epoch 5 step 1489: training accuarcy: 0.998
Epoch 5 step 1489: training loss: 327.59787807575344
Epoch 5 step 1490: training accuarcy: 0.995
Epoch 5 step 1490: training loss: 330.1775619091519
Epoch 5 step 1491: training accuarcy: 0.9965
Epoch 5 step 1491: training loss: 338.83962460807874
Epoch 5 step 1492: training accuarcy: 0.995
Epoch 5 step 1492: training loss: 319.9235989924775
Epoch 5 step 1493: training accuarcy: 0.9965
Epoch 5 step 1493: training l

Epoch 5 step 1567: training loss: 337.3905831498827
Epoch 5 step 1568: training accuarcy: 0.9955
Epoch 5 step 1568: training loss: 329.86748872902007
Epoch 5 step 1569: training accuarcy: 0.994
Epoch 5 step 1569: training loss: 322.45900910224543
Epoch 5 step 1570: training accuarcy: 0.995
Epoch 5 step 1570: training loss: 335.1481004471458
Epoch 5 step 1571: training accuarcy: 0.997
Epoch 5 step 1571: training loss: 335.2599903073251
Epoch 5 step 1572: training accuarcy: 0.997
Epoch 5 step 1572: training loss: 327.94504995961927
Epoch 5 step 1573: training accuarcy: 0.998
Epoch 5 step 1573: training loss: 323.6124569384159
Epoch 5 step 1574: training accuarcy: 0.9985
Epoch 5 step 1574: training loss: 319.8922358030462
Epoch 5 step 1575: training accuarcy: 0.997
Epoch 5 step 1575: training loss: 330.40251445351544
Epoch 5 step 1576: training accuarcy: 0.9955
Epoch 5 step 1576: training loss: 334.9631085848649
Epoch 5 step 1577: training accuarcy: 0.997
Epoch 5 step 1577: training loss:

 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                      | 6/8 [11:43<03:50, 115.28s/it]

Epoch: 6
Epoch 6 step 1578: training loss: 316.62284789303715
Epoch 6 step 1579: training accuarcy: 0.9935
Epoch 6 step 1579: training loss: 329.9828304113389
Epoch 6 step 1580: training accuarcy: 0.996
Epoch 6 step 1580: training loss: 323.5796471669129
Epoch 6 step 1581: training accuarcy: 0.998
Epoch 6 step 1581: training loss: 321.6963268416048
Epoch 6 step 1582: training accuarcy: 0.9965
Epoch 6 step 1582: training loss: 313.2548561400763
Epoch 6 step 1583: training accuarcy: 0.994
Epoch 6 step 1583: training loss: 309.1014486234674
Epoch 6 step 1584: training accuarcy: 0.998
Epoch 6 step 1584: training loss: 338.18399636405434
Epoch 6 step 1585: training accuarcy: 0.992
Epoch 6 step 1585: training loss: 311.5317243289815
Epoch 6 step 1586: training accuarcy: 0.996
Epoch 6 step 1586: training loss: 313.8682328264628
Epoch 6 step 1587: training accuarcy: 0.9955
Epoch 6 step 1587: training loss: 319.0030924558677
Epoch 6 step 1588: training accuarcy: 0.9955
Epoch 6 step 1588: traini

Epoch 6 step 1663: training accuarcy: 0.9955
Epoch 6 step 1663: training loss: 316.72572420519026
Epoch 6 step 1664: training accuarcy: 0.9965
Epoch 6 step 1664: training loss: 326.06756223222226
Epoch 6 step 1665: training accuarcy: 0.998
Epoch 6 step 1665: training loss: 317.7778645679872
Epoch 6 step 1666: training accuarcy: 0.9945
Epoch 6 step 1666: training loss: 327.84942508850867
Epoch 6 step 1667: training accuarcy: 0.9965
Epoch 6 step 1667: training loss: 341.0111929779638
Epoch 6 step 1668: training accuarcy: 0.997
Epoch 6 step 1668: training loss: 329.97753278433936
Epoch 6 step 1669: training accuarcy: 0.9965
Epoch 6 step 1669: training loss: 319.108649503233
Epoch 6 step 1670: training accuarcy: 0.998
Epoch 6 step 1670: training loss: 327.5941052089297
Epoch 6 step 1671: training accuarcy: 0.9955
Epoch 6 step 1671: training loss: 315.35471951055115
Epoch 6 step 1672: training accuarcy: 0.995
Epoch 6 step 1672: training loss: 319.80512298675774
Epoch 6 step 1673: training a

Epoch 6 step 1747: training accuarcy: 0.991
Epoch 6 step 1747: training loss: 339.0494114740451
Epoch 6 step 1748: training accuarcy: 0.9945
Epoch 6 step 1748: training loss: 334.21966006896344
Epoch 6 step 1749: training accuarcy: 0.9965
Epoch 6 step 1749: training loss: 344.9077539813636
Epoch 6 step 1750: training accuarcy: 0.995
Epoch 6 step 1750: training loss: 322.35832226725796
Epoch 6 step 1751: training accuarcy: 0.997
Epoch 6 step 1751: training loss: 329.32460510797637
Epoch 6 step 1752: training accuarcy: 0.9975
Epoch 6 step 1752: training loss: 338.50033102981115
Epoch 6 step 1753: training accuarcy: 0.9945
Epoch 6 step 1753: training loss: 320.93068927961366
Epoch 6 step 1754: training accuarcy: 0.997
Epoch 6 step 1754: training loss: 331.932300544874
Epoch 6 step 1755: training accuarcy: 0.9955
Epoch 6 step 1755: training loss: 320.04520426288104
Epoch 6 step 1756: training accuarcy: 0.9955
Epoch 6 step 1756: training loss: 326.3571632940833
Epoch 6 step 1757: training a

Epoch 6 step 1831: training loss: 333.19700528619444
Epoch 6 step 1832: training accuarcy: 0.9965
Epoch 6 step 1832: training loss: 324.19385579663594
Epoch 6 step 1833: training accuarcy: 0.9985
Epoch 6 step 1833: training loss: 327.84905327590195
Epoch 6 step 1834: training accuarcy: 0.9955
Epoch 6 step 1834: training loss: 330.01950764959605
Epoch 6 step 1835: training accuarcy: 0.9955
Epoch 6 step 1835: training loss: 322.93696679825666
Epoch 6 step 1836: training accuarcy: 0.996
Epoch 6 step 1836: training loss: 333.40553465839264
Epoch 6 step 1837: training accuarcy: 0.997
Epoch 6 step 1837: training loss: 329.8539937748701
Epoch 6 step 1838: training accuarcy: 0.996
Epoch 6 step 1838: training loss: 320.8889296170819
Epoch 6 step 1839: training accuarcy: 0.9975
Epoch 6 step 1839: training loss: 321.0241347919924
Epoch 6 step 1840: training accuarcy: 0.9985
Epoch 6 step 1840: training loss: 240.53445059041778
Epoch 6 step 1841: training accuarcy: 1.0
Epoch 6: train loss 327.12506

 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                   | 7/8 [13:33<01:53, 113.67s/it]

Epoch: 7
Epoch 7 step 1841: training loss: 329.3220929687893
Epoch 7 step 1842: training accuarcy: 0.993
Epoch 7 step 1842: training loss: 316.8665933907949
Epoch 7 step 1843: training accuarcy: 0.9955
Epoch 7 step 1843: training loss: 309.30408222590887
Epoch 7 step 1844: training accuarcy: 0.9965
Epoch 7 step 1844: training loss: 316.34608045385573
Epoch 7 step 1845: training accuarcy: 0.998
Epoch 7 step 1845: training loss: 319.88587668972855
Epoch 7 step 1846: training accuarcy: 0.998
Epoch 7 step 1846: training loss: 321.54724121306856
Epoch 7 step 1847: training accuarcy: 0.9985
Epoch 7 step 1847: training loss: 312.80154667536164
Epoch 7 step 1848: training accuarcy: 0.9975
Epoch 7 step 1848: training loss: 319.02496557633333
Epoch 7 step 1849: training accuarcy: 0.9965
Epoch 7 step 1849: training loss: 311.1567180435996
Epoch 7 step 1850: training accuarcy: 0.9975
Epoch 7 step 1850: training loss: 313.3573495488681
Epoch 7 step 1851: training accuarcy: 0.997
Epoch 7 step 1851: 

Epoch 7 step 1925: training loss: 332.9843082352826
Epoch 7 step 1926: training accuarcy: 0.994
Epoch 7 step 1926: training loss: 331.4446077903759
Epoch 7 step 1927: training accuarcy: 0.9945
Epoch 7 step 1927: training loss: 325.7790233012583
Epoch 7 step 1928: training accuarcy: 0.9955
Epoch 7 step 1928: training loss: 344.44421882884114
Epoch 7 step 1929: training accuarcy: 0.9915
Epoch 7 step 1929: training loss: 333.33931172099693
Epoch 7 step 1930: training accuarcy: 0.995
Epoch 7 step 1930: training loss: 320.5830591366987
Epoch 7 step 1931: training accuarcy: 0.998
Epoch 7 step 1931: training loss: 335.8779391274442
Epoch 7 step 1932: training accuarcy: 0.9945
Epoch 7 step 1932: training loss: 316.46374005818694
Epoch 7 step 1933: training accuarcy: 0.9965
Epoch 7 step 1933: training loss: 334.56897552039834
Epoch 7 step 1934: training accuarcy: 0.9965
Epoch 7 step 1934: training loss: 323.9347197086603
Epoch 7 step 1935: training accuarcy: 0.994
Epoch 7 step 1935: training lo

Epoch 7 step 2010: training accuarcy: 0.9955
Epoch 7 step 2010: training loss: 328.935751366856
Epoch 7 step 2011: training accuarcy: 0.996
Epoch 7 step 2011: training loss: 325.5385763191102
Epoch 7 step 2012: training accuarcy: 0.9975
Epoch 7 step 2012: training loss: 337.1156319602303
Epoch 7 step 2013: training accuarcy: 0.995
Epoch 7 step 2013: training loss: 326.8206010556179
Epoch 7 step 2014: training accuarcy: 0.9955
Epoch 7 step 2014: training loss: 318.550511375767
Epoch 7 step 2015: training accuarcy: 0.996
Epoch 7 step 2015: training loss: 324.5610541237395
Epoch 7 step 2016: training accuarcy: 0.998
Epoch 7 step 2016: training loss: 336.9042692020139
Epoch 7 step 2017: training accuarcy: 0.995
Epoch 7 step 2017: training loss: 331.64046277004104
Epoch 7 step 2018: training accuarcy: 0.995
Epoch 7 step 2018: training loss: 327.00539135199955
Epoch 7 step 2019: training accuarcy: 0.9985
Epoch 7 step 2019: training loss: 330.69742573703854
Epoch 7 step 2020: training accuarc

Epoch 7 step 2094: training loss: 323.4826474826256
Epoch 7 step 2095: training accuarcy: 0.997
Epoch 7 step 2095: training loss: 333.4686184369758
Epoch 7 step 2096: training accuarcy: 0.9965
Epoch 7 step 2096: training loss: 339.2355380450729
Epoch 7 step 2097: training accuarcy: 0.9975
Epoch 7 step 2097: training loss: 341.9979638092247
Epoch 7 step 2098: training accuarcy: 0.995
Epoch 7 step 2098: training loss: 330.81652240053904
Epoch 7 step 2099: training accuarcy: 0.995
Epoch 7 step 2099: training loss: 319.15309088569927
Epoch 7 step 2100: training accuarcy: 0.9945
Epoch 7 step 2100: training loss: 312.6963901203337
Epoch 7 step 2101: training accuarcy: 0.9965
Epoch 7 step 2101: training loss: 321.20695039244686
Epoch 7 step 2102: training accuarcy: 0.9965
Epoch 7 step 2102: training loss: 317.5384766887032
Epoch 7 step 2103: training accuarcy: 0.9975
Epoch 7 step 2103: training loss: 242.85923543437607
Epoch 7 step 2104: training accuarcy: 0.9910256410256411
Epoch 7: train lo

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [15:26<00:00, 113.57s/it]


In [22]:
del fm_model
T.cuda.empty_cache()

### Train HRM FM Model

In [23]:
hrm_model = TorchHrmFM(feature_dim=feat_dim, num_dim=NUM_DIM, init_mean=INIT_MEAN)
hrm_model

TorchHrmFM()

In [24]:
adam_opt = optim.Adam(hrm_model.parameters(), lr=LEARNING_RATE)
schedular = optim.lr_scheduler.StepLR(adam_opt,
                                      step_size=DECAY_FREQ,
                                      gamma=DECAY_GAMMA)

In [25]:
hrm_learner = FMLearner(hrm_model, adam_opt, schedular, db)
hrm_learner

<models.fm_learner.FMLearner at 0x1f20046bcc0>

In [26]:
hrm_learner.fit(epoch=8,
                loss_callback=simple_loss_callback,
                log_dir=get_log_dir('topcoder', 'hrm'))

  0%|                                                                                                                                                                 | 0/8 [00:00<?, ?it/s]

Epoch: 0
Epoch 0 step 0: training loss: 37508.48394862066
Epoch 0 step 1: training accuarcy: 0.489
Epoch 0 step 1: training loss: 36442.54428888903
Epoch 0 step 2: training accuarcy: 0.49
Epoch 0 step 2: training loss: 35401.56785084879
Epoch 0 step 3: training accuarcy: 0.4995
Epoch 0 step 3: training loss: 34378.68944804763
Epoch 0 step 4: training accuarcy: 0.5035000000000001
Epoch 0 step 4: training loss: 33382.10234287656
Epoch 0 step 5: training accuarcy: 0.5075000000000001
Epoch 0 step 5: training loss: 32409.860371521718
Epoch 0 step 6: training accuarcy: 0.504
Epoch 0 step 6: training loss: 31457.042214567897
Epoch 0 step 7: training accuarcy: 0.496
Epoch 0 step 7: training loss: 30524.22628953661
Epoch 0 step 8: training accuarcy: 0.496
Epoch 0 step 8: training loss: 29613.44862392991
Epoch 0 step 9: training accuarcy: 0.5225
Epoch 0 step 9: training loss: 28726.668180471803
Epoch 0 step 10: training accuarcy: 0.5075000000000001
Epoch 0 step 10: training loss: 27861.619561225

Epoch 0 step 87: training loss: 2623.9082366251396
Epoch 0 step 88: training accuarcy: 0.5555
Epoch 0 step 88: training loss: 2566.6791349567147
Epoch 0 step 89: training accuarcy: 0.5655
Epoch 0 step 89: training loss: 2512.556097300625
Epoch 0 step 90: training accuarcy: 0.5640000000000001
Epoch 0 step 90: training loss: 2461.505345053759
Epoch 0 step 91: training accuarcy: 0.578
Epoch 0 step 91: training loss: 2411.553838109179
Epoch 0 step 92: training accuarcy: 0.551
Epoch 0 step 92: training loss: 2364.202697669666
Epoch 0 step 93: training accuarcy: 0.5565
Epoch 0 step 93: training loss: 2317.773404728965
Epoch 0 step 94: training accuarcy: 0.5690000000000001
Epoch 0 step 94: training loss: 2276.52178179392
Epoch 0 step 95: training accuarcy: 0.5235
Epoch 0 step 95: training loss: 2232.1307986384318
Epoch 0 step 96: training accuarcy: 0.5630000000000001
Epoch 0 step 96: training loss: 2192.625100020993
Epoch 0 step 97: training accuarcy: 0.5640000000000001
Epoch 0 step 97: train

Epoch 0 step 172: training accuarcy: 0.6435
Epoch 0 step 172: training loss: 1395.9451786591947
Epoch 0 step 173: training accuarcy: 0.6495
Epoch 0 step 173: training loss: 1395.700017244339
Epoch 0 step 174: training accuarcy: 0.633
Epoch 0 step 174: training loss: 1394.4504209212423
Epoch 0 step 175: training accuarcy: 0.6525
Epoch 0 step 175: training loss: 1394.3784052799422
Epoch 0 step 176: training accuarcy: 0.633
Epoch 0 step 176: training loss: 1393.6568341847271
Epoch 0 step 177: training accuarcy: 0.6415
Epoch 0 step 177: training loss: 1392.4720800108526
Epoch 0 step 178: training accuarcy: 0.669
Epoch 0 step 178: training loss: 1391.5860948089157
Epoch 0 step 179: training accuarcy: 0.6575
Epoch 0 step 179: training loss: 1391.6421523297481
Epoch 0 step 180: training accuarcy: 0.6375000000000001
Epoch 0 step 180: training loss: 1391.7604724936352
Epoch 0 step 181: training accuarcy: 0.644
Epoch 0 step 181: training loss: 1390.3580626694768
Epoch 0 step 182: training accuar

Epoch 0 step 257: training loss: 1383.2192283649413
Epoch 0 step 258: training accuarcy: 0.6910000000000001
Epoch 0 step 258: training loss: 1383.1231538035497
Epoch 0 step 259: training accuarcy: 0.6945
Epoch 0 step 259: training loss: 1382.7370542033436
Epoch 0 step 260: training accuarcy: 0.6945
Epoch 0 step 260: training loss: 1383.1388806180219
Epoch 0 step 261: training accuarcy: 0.6950000000000001
Epoch 0 step 261: training loss: 1383.837933860504
Epoch 0 step 262: training accuarcy: 0.682
Epoch 0 step 262: training loss: 542.5687598480471
Epoch 0 step 263: training accuarcy: 0.7
Epoch 0: train loss 5339.627408337506, train accuarcy 0.6063984632492065
Epoch 0: valid loss 1365.690356413305, valid accuarcy 0.7049726843833923


 12%|███████████████████                                                                                                                                     | 1/8 [01:57<13:40, 117.24s/it]

Epoch: 1
Epoch 1 step 263: training loss: 1382.2335485599704
Epoch 1 step 264: training accuarcy: 0.7145
Epoch 1 step 264: training loss: 1382.9602796375345
Epoch 1 step 265: training accuarcy: 0.6935
Epoch 1 step 265: training loss: 1382.3812017308117
Epoch 1 step 266: training accuarcy: 0.6970000000000001
Epoch 1 step 266: training loss: 1382.3396746286323
Epoch 1 step 267: training accuarcy: 0.7055
Epoch 1 step 267: training loss: 1382.8357907608686
Epoch 1 step 268: training accuarcy: 0.6910000000000001
Epoch 1 step 268: training loss: 1383.0191315930865
Epoch 1 step 269: training accuarcy: 0.6960000000000001
Epoch 1 step 269: training loss: 1382.7444156974543
Epoch 1 step 270: training accuarcy: 0.7035
Epoch 1 step 270: training loss: 1382.4613726292046
Epoch 1 step 271: training accuarcy: 0.712
Epoch 1 step 271: training loss: 1382.3746117934768
Epoch 1 step 272: training accuarcy: 0.713
Epoch 1 step 272: training loss: 1381.9780242194338
Epoch 1 step 273: training accuarcy: 0.71

Epoch 1 step 346: training accuarcy: 0.7030000000000001
Epoch 1 step 346: training loss: 1383.0737209036376
Epoch 1 step 347: training accuarcy: 0.6960000000000001
Epoch 1 step 347: training loss: 1382.6014479690425
Epoch 1 step 348: training accuarcy: 0.711
Epoch 1 step 348: training loss: 1383.448127953567
Epoch 1 step 349: training accuarcy: 0.6865
Epoch 1 step 349: training loss: 1382.8779757852094
Epoch 1 step 350: training accuarcy: 0.6955
Epoch 1 step 350: training loss: 1382.9031838050273
Epoch 1 step 351: training accuarcy: 0.6935
Epoch 1 step 351: training loss: 1382.0983419460795
Epoch 1 step 352: training accuarcy: 0.7135
Epoch 1 step 352: training loss: 1382.3576242372903
Epoch 1 step 353: training accuarcy: 0.6955
Epoch 1 step 353: training loss: 1382.0952845453785
Epoch 1 step 354: training accuarcy: 0.714
Epoch 1 step 354: training loss: 1383.0207625291544
Epoch 1 step 355: training accuarcy: 0.687
Epoch 1 step 355: training loss: 1382.7337704577392
Epoch 1 step 356: tr

Epoch 1 step 427: training loss: 1383.3466869210233
Epoch 1 step 428: training accuarcy: 0.6815
Epoch 1 step 428: training loss: 1383.741513599892
Epoch 1 step 429: training accuarcy: 0.6875
Epoch 1 step 429: training loss: 1383.1631343863094
Epoch 1 step 430: training accuarcy: 0.6995
Epoch 1 step 430: training loss: 1383.1306379368655
Epoch 1 step 431: training accuarcy: 0.6885
Epoch 1 step 431: training loss: 1381.902200007858
Epoch 1 step 432: training accuarcy: 0.6975
Epoch 1 step 432: training loss: 1383.310444263779
Epoch 1 step 433: training accuarcy: 0.705
Epoch 1 step 433: training loss: 1384.044912644757
Epoch 1 step 434: training accuarcy: 0.6745
Epoch 1 step 434: training loss: 1383.2501243331633
Epoch 1 step 435: training accuarcy: 0.6940000000000001
Epoch 1 step 435: training loss: 1382.6929187817564
Epoch 1 step 436: training accuarcy: 0.7135
Epoch 1 step 436: training loss: 1383.4889490217479
Epoch 1 step 437: training accuarcy: 0.6880000000000001
Epoch 1 step 437: tra

Epoch 1 step 510: training loss: 1383.2182823765247
Epoch 1 step 511: training accuarcy: 0.6890000000000001
Epoch 1 step 511: training loss: 1382.0845721749126
Epoch 1 step 512: training accuarcy: 0.7125
Epoch 1 step 512: training loss: 1383.2221426235692
Epoch 1 step 513: training accuarcy: 0.6960000000000001
Epoch 1 step 513: training loss: 1382.761634046808
Epoch 1 step 514: training accuarcy: 0.713
Epoch 1 step 514: training loss: 1382.4995598793453
Epoch 1 step 515: training accuarcy: 0.7020000000000001
Epoch 1 step 515: training loss: 1382.8448034510623
Epoch 1 step 516: training accuarcy: 0.6955
Epoch 1 step 516: training loss: 1383.624820750143
Epoch 1 step 517: training accuarcy: 0.6835
Epoch 1 step 517: training loss: 1382.6092741058642
Epoch 1 step 518: training accuarcy: 0.7035
Epoch 1 step 518: training loss: 1382.8900977488643
Epoch 1 step 519: training accuarcy: 0.6960000000000001
Epoch 1 step 519: training loss: 1383.3443141860794
Epoch 1 step 520: training accuarcy: 0.

 25%|██████████████████████████████████████                                                                                                                  | 2/8 [03:53<11:41, 116.99s/it]

Epoch: 2
Epoch 2 step 526: training loss: 1380.687579611831
Epoch 2 step 527: training accuarcy: 0.726
Epoch 2 step 527: training loss: 1381.6223539363955
Epoch 2 step 528: training accuarcy: 0.718
Epoch 2 step 528: training loss: 1382.3753080623578
Epoch 2 step 529: training accuarcy: 0.7075
Epoch 2 step 529: training loss: 1380.9742153913714
Epoch 2 step 530: training accuarcy: 0.7365
Epoch 2 step 530: training loss: 1382.264849161149
Epoch 2 step 531: training accuarcy: 0.719
Epoch 2 step 531: training loss: 1382.0788115044886
Epoch 2 step 532: training accuarcy: 0.7205
Epoch 2 step 532: training loss: 1382.347236856899
Epoch 2 step 533: training accuarcy: 0.7135
Epoch 2 step 533: training loss: 1382.0310448776056
Epoch 2 step 534: training accuarcy: 0.7125
Epoch 2 step 534: training loss: 1381.951551471578
Epoch 2 step 535: training accuarcy: 0.717
Epoch 2 step 535: training loss: 1383.1778070658934
Epoch 2 step 536: training accuarcy: 0.714
Epoch 2 step 536: training loss: 1382.11

Epoch 2 step 610: training accuarcy: 0.6995
Epoch 2 step 610: training loss: 1382.8919874299904
Epoch 2 step 611: training accuarcy: 0.6995
Epoch 2 step 611: training loss: 1382.8247351634031
Epoch 2 step 612: training accuarcy: 0.7010000000000001
Epoch 2 step 612: training loss: 1382.9986814176298
Epoch 2 step 613: training accuarcy: 0.6920000000000001
Epoch 2 step 613: training loss: 1382.9701774573261
Epoch 2 step 614: training accuarcy: 0.6990000000000001
Epoch 2 step 614: training loss: 1382.219830732438
Epoch 2 step 615: training accuarcy: 0.7000000000000001
Epoch 2 step 615: training loss: 1382.2563906466617
Epoch 2 step 616: training accuarcy: 0.7065
Epoch 2 step 616: training loss: 1383.0526136180931
Epoch 2 step 617: training accuarcy: 0.6935
Epoch 2 step 617: training loss: 1382.526183047369
Epoch 2 step 618: training accuarcy: 0.704
Epoch 2 step 618: training loss: 1382.515421642378
Epoch 2 step 619: training accuarcy: 0.7145
Epoch 2 step 619: training loss: 1383.3689870500

Epoch 2 step 692: training accuarcy: 0.709
Epoch 2 step 692: training loss: 1382.5589543374374
Epoch 2 step 693: training accuarcy: 0.6980000000000001
Epoch 2 step 693: training loss: 1383.364360076782
Epoch 2 step 694: training accuarcy: 0.6885
Epoch 2 step 694: training loss: 1382.750101116641
Epoch 2 step 695: training accuarcy: 0.6795
Epoch 2 step 695: training loss: 1382.4020238429368
Epoch 2 step 696: training accuarcy: 0.7020000000000001
Epoch 2 step 696: training loss: 1382.924683947045
Epoch 2 step 697: training accuarcy: 0.7010000000000001
Epoch 2 step 697: training loss: 1383.3500682963434
Epoch 2 step 698: training accuarcy: 0.6895
Epoch 2 step 698: training loss: 1383.96372903708
Epoch 2 step 699: training accuarcy: 0.6785
Epoch 2 step 699: training loss: 1383.542361172814
Epoch 2 step 700: training accuarcy: 0.6925
Epoch 2 step 700: training loss: 1383.2132949124157
Epoch 2 step 701: training accuarcy: 0.6975
Epoch 2 step 701: training loss: 1383.1017033906824
Epoch 2 ste

Epoch 2 step 775: training accuarcy: 0.6895
Epoch 2 step 775: training loss: 1382.7478025617104
Epoch 2 step 776: training accuarcy: 0.6970000000000001
Epoch 2 step 776: training loss: 1383.6289528385523
Epoch 2 step 777: training accuarcy: 0.684
Epoch 2 step 777: training loss: 1382.1087440938288
Epoch 2 step 778: training accuarcy: 0.7145
Epoch 2 step 778: training loss: 1382.6784404400473
Epoch 2 step 779: training accuarcy: 0.6940000000000001
Epoch 2 step 779: training loss: 1382.5811290413974
Epoch 2 step 780: training accuarcy: 0.6925
Epoch 2 step 780: training loss: 1382.85629720585
Epoch 2 step 781: training accuarcy: 0.6940000000000001
Epoch 2 step 781: training loss: 1383.7141193619436
Epoch 2 step 782: training accuarcy: 0.6905
Epoch 2 step 782: training loss: 1382.4635407077728
Epoch 2 step 783: training accuarcy: 0.6975
Epoch 2 step 783: training loss: 1383.3754000244826
Epoch 2 step 784: training accuarcy: 0.6920000000000001
Epoch 2 step 784: training loss: 1381.918467200

 38%|█████████████████████████████████████████████████████████                                                                                               | 3/8 [05:50<09:44, 116.90s/it]

Epoch: 3
Epoch 3 step 789: training loss: 1381.359329905503
Epoch 3 step 790: training accuarcy: 0.721
Epoch 3 step 790: training loss: 1381.4663453497642
Epoch 3 step 791: training accuarcy: 0.7285
Epoch 3 step 791: training loss: 1381.6574088676011
Epoch 3 step 792: training accuarcy: 0.7265
Epoch 3 step 792: training loss: 1382.0332433024298
Epoch 3 step 793: training accuarcy: 0.7135
Epoch 3 step 793: training loss: 1382.277451512998
Epoch 3 step 794: training accuarcy: 0.709
Epoch 3 step 794: training loss: 1382.2713160837393
Epoch 3 step 795: training accuarcy: 0.7135
Epoch 3 step 795: training loss: 1381.6934251096998
Epoch 3 step 796: training accuarcy: 0.715
Epoch 3 step 796: training loss: 1382.816108734426
Epoch 3 step 797: training accuarcy: 0.6975
Epoch 3 step 797: training loss: 1381.3050231064317
Epoch 3 step 798: training accuarcy: 0.717
Epoch 3 step 798: training loss: 1381.1817076043144
Epoch 3 step 799: training accuarcy: 0.7405
Epoch 3 step 799: training loss: 1381.

Epoch 3 step 872: training accuarcy: 0.6990000000000001
Epoch 3 step 872: training loss: 1382.8342172630805
Epoch 3 step 873: training accuarcy: 0.6890000000000001
Epoch 3 step 873: training loss: 1382.408737870862
Epoch 3 step 874: training accuarcy: 0.6910000000000001
Epoch 3 step 874: training loss: 1382.7852380022237
Epoch 3 step 875: training accuarcy: 0.722
Epoch 3 step 875: training loss: 1382.8981096418468
Epoch 3 step 876: training accuarcy: 0.7015
Epoch 3 step 876: training loss: 1383.094732449932
Epoch 3 step 877: training accuarcy: 0.7065
Epoch 3 step 877: training loss: 1382.9133699775402
Epoch 3 step 878: training accuarcy: 0.6985
Epoch 3 step 878: training loss: 1383.3511092711428
Epoch 3 step 879: training accuarcy: 0.7015
Epoch 3 step 879: training loss: 1382.8888366873455
Epoch 3 step 880: training accuarcy: 0.6965
Epoch 3 step 880: training loss: 1383.2441821626264
Epoch 3 step 881: training accuarcy: 0.6915
Epoch 3 step 881: training loss: 1382.2240952187592
Epoch 3

Epoch 3 step 955: training accuarcy: 0.6950000000000001
Epoch 3 step 955: training loss: 1383.2117954111438
Epoch 3 step 956: training accuarcy: 0.686
Epoch 3 step 956: training loss: 1382.2238967924873
Epoch 3 step 957: training accuarcy: 0.7035
Epoch 3 step 957: training loss: 1382.562102176379
Epoch 3 step 958: training accuarcy: 0.7005
Epoch 3 step 958: training loss: 1382.9010781706747
Epoch 3 step 959: training accuarcy: 0.7020000000000001
Epoch 3 step 959: training loss: 1383.313182998501
Epoch 3 step 960: training accuarcy: 0.7035
Epoch 3 step 960: training loss: 1383.4887704805974
Epoch 3 step 961: training accuarcy: 0.6915
Epoch 3 step 961: training loss: 1382.2393600231171
Epoch 3 step 962: training accuarcy: 0.71
Epoch 3 step 962: training loss: 1382.2856009137301
Epoch 3 step 963: training accuarcy: 0.6950000000000001
Epoch 3 step 963: training loss: 1383.094539804581
Epoch 3 step 964: training accuarcy: 0.6920000000000001
Epoch 3 step 964: training loss: 1383.078807304619

Epoch 3 step 1036: training accuarcy: 0.6930000000000001
Epoch 3 step 1036: training loss: 1383.0001820319944
Epoch 3 step 1037: training accuarcy: 0.6960000000000001
Epoch 3 step 1037: training loss: 1382.9766155011955
Epoch 3 step 1038: training accuarcy: 0.7135
Epoch 3 step 1038: training loss: 1383.7114875687118
Epoch 3 step 1039: training accuarcy: 0.6905
Epoch 3 step 1039: training loss: 1382.321061804624
Epoch 3 step 1040: training accuarcy: 0.7095
Epoch 3 step 1040: training loss: 1383.4811931571169
Epoch 3 step 1041: training accuarcy: 0.686
Epoch 3 step 1041: training loss: 1382.7678398539358
Epoch 3 step 1042: training accuarcy: 0.6935
Epoch 3 step 1042: training loss: 1383.1578072251814
Epoch 3 step 1043: training accuarcy: 0.6985
Epoch 3 step 1043: training loss: 1383.43803277923
Epoch 3 step 1044: training accuarcy: 0.682
Epoch 3 step 1044: training loss: 1382.525838068188
Epoch 3 step 1045: training accuarcy: 0.6935
Epoch 3 step 1045: training loss: 1382.6732790175304
Ep

 50%|████████████████████████████████████████████████████████████████████████████                                                                            | 4/8 [07:44<07:43, 115.99s/it]

Epoch: 4
Epoch 4 step 1052: training loss: 1382.0408522295422
Epoch 4 step 1053: training accuarcy: 0.7215
Epoch 4 step 1053: training loss: 1381.8361811680559
Epoch 4 step 1054: training accuarcy: 0.7075
Epoch 4 step 1054: training loss: 1382.1049328887134
Epoch 4 step 1055: training accuarcy: 0.708
Epoch 4 step 1055: training loss: 1382.4317358418919
Epoch 4 step 1056: training accuarcy: 0.707
Epoch 4 step 1056: training loss: 1381.2602350573336
Epoch 4 step 1057: training accuarcy: 0.7145
Epoch 4 step 1057: training loss: 1381.6109753770697
Epoch 4 step 1058: training accuarcy: 0.718
Epoch 4 step 1058: training loss: 1382.4188737273682
Epoch 4 step 1059: training accuarcy: 0.716
Epoch 4 step 1059: training loss: 1382.6446113002419
Epoch 4 step 1060: training accuarcy: 0.7035
Epoch 4 step 1060: training loss: 1382.3513262279919
Epoch 4 step 1061: training accuarcy: 0.709
Epoch 4 step 1061: training loss: 1382.5711978479458
Epoch 4 step 1062: training accuarcy: 0.7145
Epoch 4 step 106

Epoch 4 step 1134: training accuarcy: 0.7075
Epoch 4 step 1134: training loss: 1383.6259785060001
Epoch 4 step 1135: training accuarcy: 0.682
Epoch 4 step 1135: training loss: 1383.0893388955708
Epoch 4 step 1136: training accuarcy: 0.6885
Epoch 4 step 1136: training loss: 1382.5168181051986
Epoch 4 step 1137: training accuarcy: 0.714
Epoch 4 step 1137: training loss: 1381.5929857023032
Epoch 4 step 1138: training accuarcy: 0.6985
Epoch 4 step 1138: training loss: 1382.6321361774842
Epoch 4 step 1139: training accuarcy: 0.7155
Epoch 4 step 1139: training loss: 1383.140800107942
Epoch 4 step 1140: training accuarcy: 0.681
Epoch 4 step 1140: training loss: 1383.1118173601556
Epoch 4 step 1141: training accuarcy: 0.6890000000000001
Epoch 4 step 1141: training loss: 1382.6128432318794
Epoch 4 step 1142: training accuarcy: 0.7005
Epoch 4 step 1142: training loss: 1382.907146712596
Epoch 4 step 1143: training accuarcy: 0.7005
Epoch 4 step 1143: training loss: 1382.5604008036662
Epoch 4 step 

Epoch 4 step 1216: training accuarcy: 0.6835
Epoch 4 step 1216: training loss: 1383.483215047522
Epoch 4 step 1217: training accuarcy: 0.6985
Epoch 4 step 1217: training loss: 1382.7371787746265
Epoch 4 step 1218: training accuarcy: 0.707
Epoch 4 step 1218: training loss: 1382.0514965918985
Epoch 4 step 1219: training accuarcy: 0.719
Epoch 4 step 1219: training loss: 1383.3726297815233
Epoch 4 step 1220: training accuarcy: 0.6875
Epoch 4 step 1220: training loss: 1383.191348447189
Epoch 4 step 1221: training accuarcy: 0.675
Epoch 4 step 1221: training loss: 1383.1023609766974
Epoch 4 step 1222: training accuarcy: 0.6910000000000001
Epoch 4 step 1222: training loss: 1382.7285639705317
Epoch 4 step 1223: training accuarcy: 0.7015
Epoch 4 step 1223: training loss: 1383.240924689382
Epoch 4 step 1224: training accuarcy: 0.6890000000000001
Epoch 4 step 1224: training loss: 1383.5253930049614
Epoch 4 step 1225: training accuarcy: 0.6900000000000001
Epoch 4 step 1225: training loss: 1383.0781

Epoch 4 step 1296: training accuarcy: 0.6940000000000001
Epoch 4 step 1296: training loss: 1382.954776112143
Epoch 4 step 1297: training accuarcy: 0.6905
Epoch 4 step 1297: training loss: 1381.7904610671806
Epoch 4 step 1298: training accuarcy: 0.704
Epoch 4 step 1298: training loss: 1382.2964007470393
Epoch 4 step 1299: training accuarcy: 0.7035
Epoch 4 step 1299: training loss: 1383.2287193772556
Epoch 4 step 1300: training accuarcy: 0.6930000000000001
Epoch 4 step 1300: training loss: 1382.359810497525
Epoch 4 step 1301: training accuarcy: 0.6965
Epoch 4 step 1301: training loss: 1382.0573018849736
Epoch 4 step 1302: training accuarcy: 0.6990000000000001
Epoch 4 step 1302: training loss: 1381.7302304098957
Epoch 4 step 1303: training accuarcy: 0.708
Epoch 4 step 1303: training loss: 1383.1892122366105
Epoch 4 step 1304: training accuarcy: 0.6945
Epoch 4 step 1304: training loss: 1383.231770240961
Epoch 4 step 1305: training accuarcy: 0.6935
Epoch 4 step 1305: training loss: 1384.178

 62%|███████████████████████████████████████████████████████████████████████████████████████████████                                                         | 5/8 [09:36<05:44, 114.95s/it]

Epoch: 5
Epoch 5 step 1315: training loss: 1381.6597103995423
Epoch 5 step 1316: training accuarcy: 0.7205
Epoch 5 step 1316: training loss: 1381.1796691800578
Epoch 5 step 1317: training accuarcy: 0.7225
Epoch 5 step 1317: training loss: 1382.3465677090928
Epoch 5 step 1318: training accuarcy: 0.6990000000000001
Epoch 5 step 1318: training loss: 1382.0706805574287
Epoch 5 step 1319: training accuarcy: 0.719
Epoch 5 step 1319: training loss: 1382.5387034344465
Epoch 5 step 1320: training accuarcy: 0.6995
Epoch 5 step 1320: training loss: 1381.8413893505965
Epoch 5 step 1321: training accuarcy: 0.7145
Epoch 5 step 1321: training loss: 1381.695640758426
Epoch 5 step 1322: training accuarcy: 0.717
Epoch 5 step 1322: training loss: 1381.9190678818773
Epoch 5 step 1323: training accuarcy: 0.7030000000000001
Epoch 5 step 1323: training loss: 1382.6374538043465
Epoch 5 step 1324: training accuarcy: 0.723
Epoch 5 step 1324: training loss: 1382.1536809648735
Epoch 5 step 1325: training accuarcy

Epoch 5 step 1397: training loss: 1382.7945020044776
Epoch 5 step 1398: training accuarcy: 0.6990000000000001
Epoch 5 step 1398: training loss: 1383.6372796714247
Epoch 5 step 1399: training accuarcy: 0.6785
Epoch 5 step 1399: training loss: 1383.2184372498675
Epoch 5 step 1400: training accuarcy: 0.686
Epoch 5 step 1400: training loss: 1383.534652456642
Epoch 5 step 1401: training accuarcy: 0.6875
Epoch 5 step 1401: training loss: 1383.205173735724
Epoch 5 step 1402: training accuarcy: 0.6990000000000001
Epoch 5 step 1402: training loss: 1383.0808009903628
Epoch 5 step 1403: training accuarcy: 0.6960000000000001
Epoch 5 step 1403: training loss: 1382.8344308978928
Epoch 5 step 1404: training accuarcy: 0.7095
Epoch 5 step 1404: training loss: 1383.0246024732446
Epoch 5 step 1405: training accuarcy: 0.6935
Epoch 5 step 1405: training loss: 1382.5396910768975
Epoch 5 step 1406: training accuarcy: 0.7045
Epoch 5 step 1406: training loss: 1382.212720652467
Epoch 5 step 1407: training accua

Epoch 5 step 1478: training accuarcy: 0.7125
Epoch 5 step 1478: training loss: 1383.2708742177786
Epoch 5 step 1479: training accuarcy: 0.6920000000000001
Epoch 5 step 1479: training loss: 1383.3188534063843
Epoch 5 step 1480: training accuarcy: 0.6880000000000001
Epoch 5 step 1480: training loss: 1383.3949534418657
Epoch 5 step 1481: training accuarcy: 0.6970000000000001
Epoch 5 step 1481: training loss: 1382.6985485762311
Epoch 5 step 1482: training accuarcy: 0.7005
Epoch 5 step 1482: training loss: 1382.354370106939
Epoch 5 step 1483: training accuarcy: 0.709
Epoch 5 step 1483: training loss: 1383.5581736544277
Epoch 5 step 1484: training accuarcy: 0.6955
Epoch 5 step 1484: training loss: 1383.1264590856863
Epoch 5 step 1485: training accuarcy: 0.6960000000000001
Epoch 5 step 1485: training loss: 1382.9231519942368
Epoch 5 step 1486: training accuarcy: 0.7020000000000001
Epoch 5 step 1486: training loss: 1382.964920259646
Epoch 5 step 1487: training accuarcy: 0.6930000000000001
Epoc

Epoch 5 step 1558: training loss: 1383.2924504620373
Epoch 5 step 1559: training accuarcy: 0.6940000000000001
Epoch 5 step 1559: training loss: 1383.056869185014
Epoch 5 step 1560: training accuarcy: 0.6920000000000001
Epoch 5 step 1560: training loss: 1382.5950885108387
Epoch 5 step 1561: training accuarcy: 0.6980000000000001
Epoch 5 step 1561: training loss: 1382.2730691926108
Epoch 5 step 1562: training accuarcy: 0.719
Epoch 5 step 1562: training loss: 1382.0466550970325
Epoch 5 step 1563: training accuarcy: 0.712
Epoch 5 step 1563: training loss: 1383.3765520712768
Epoch 5 step 1564: training accuarcy: 0.687
Epoch 5 step 1564: training loss: 1382.581710501581
Epoch 5 step 1565: training accuarcy: 0.6935
Epoch 5 step 1565: training loss: 1382.8522371871832
Epoch 5 step 1566: training accuarcy: 0.6895
Epoch 5 step 1566: training loss: 1383.047410983072
Epoch 5 step 1567: training accuarcy: 0.704
Epoch 5 step 1567: training loss: 1381.6463631597583
Epoch 5 step 1568: training accuarcy

 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                      | 6/8 [11:30<03:49, 114.67s/it]

Epoch: 6
Epoch 6 step 1578: training loss: 1382.0416807791064
Epoch 6 step 1579: training accuarcy: 0.72
Epoch 6 step 1579: training loss: 1381.6716833373819
Epoch 6 step 1580: training accuarcy: 0.73
Epoch 6 step 1580: training loss: 1382.5053743233568
Epoch 6 step 1581: training accuarcy: 0.7255
Epoch 6 step 1581: training loss: 1381.7095520397102
Epoch 6 step 1582: training accuarcy: 0.711
Epoch 6 step 1582: training loss: 1381.6096645210694
Epoch 6 step 1583: training accuarcy: 0.713
Epoch 6 step 1583: training loss: 1382.1205756783447
Epoch 6 step 1584: training accuarcy: 0.7145
Epoch 6 step 1584: training loss: 1381.3016516701566
Epoch 6 step 1585: training accuarcy: 0.7285
Epoch 6 step 1585: training loss: 1381.761175331917
Epoch 6 step 1586: training accuarcy: 0.7125
Epoch 6 step 1586: training loss: 1382.847758280477
Epoch 6 step 1587: training accuarcy: 0.6900000000000001
Epoch 6 step 1587: training loss: 1381.39671459321
Epoch 6 step 1588: training accuarcy: 0.7085
Epoch 6 s

Epoch 6 step 1660: training loss: 1382.7895176006652
Epoch 6 step 1661: training accuarcy: 0.7030000000000001
Epoch 6 step 1661: training loss: 1383.6638815351837
Epoch 6 step 1662: training accuarcy: 0.687
Epoch 6 step 1662: training loss: 1382.6111148458929
Epoch 6 step 1663: training accuarcy: 0.7085
Epoch 6 step 1663: training loss: 1382.8756824735635
Epoch 6 step 1664: training accuarcy: 0.7025
Epoch 6 step 1664: training loss: 1383.0679182382307
Epoch 6 step 1665: training accuarcy: 0.6985
Epoch 6 step 1665: training loss: 1382.710619387123
Epoch 6 step 1666: training accuarcy: 0.705
Epoch 6 step 1666: training loss: 1383.2180684538018
Epoch 6 step 1667: training accuarcy: 0.6935
Epoch 6 step 1667: training loss: 1382.397610811431
Epoch 6 step 1668: training accuarcy: 0.7105
Epoch 6 step 1668: training loss: 1382.260484270875
Epoch 6 step 1669: training accuarcy: 0.719
Epoch 6 step 1669: training loss: 1383.0546820841757
Epoch 6 step 1670: training accuarcy: 0.6990000000000001
Ep

Epoch 6 step 1741: training loss: 1382.6703116336673
Epoch 6 step 1742: training accuarcy: 0.7105
Epoch 6 step 1742: training loss: 1382.980924562956
Epoch 6 step 1743: training accuarcy: 0.6980000000000001
Epoch 6 step 1743: training loss: 1382.8738779442108
Epoch 6 step 1744: training accuarcy: 0.706
Epoch 6 step 1744: training loss: 1382.8284206021071
Epoch 6 step 1745: training accuarcy: 0.712
Epoch 6 step 1745: training loss: 1382.8851707959138
Epoch 6 step 1746: training accuarcy: 0.7020000000000001
Epoch 6 step 1746: training loss: 1382.7754425562712
Epoch 6 step 1747: training accuarcy: 0.707
Epoch 6 step 1747: training loss: 1382.566759800367
Epoch 6 step 1748: training accuarcy: 0.6950000000000001
Epoch 6 step 1748: training loss: 1383.3280967414867
Epoch 6 step 1749: training accuarcy: 0.6920000000000001
Epoch 6 step 1749: training loss: 1383.262365839473
Epoch 6 step 1750: training accuarcy: 0.6970000000000001
Epoch 6 step 1750: training loss: 1383.5459307362794
Epoch 6 ste

Epoch 6 step 1822: training accuarcy: 0.7055
Epoch 6 step 1822: training loss: 1382.0399816179906
Epoch 6 step 1823: training accuarcy: 0.726
Epoch 6 step 1823: training loss: 1383.5294796207504
Epoch 6 step 1824: training accuarcy: 0.6960000000000001
Epoch 6 step 1824: training loss: 1382.8850290705232
Epoch 6 step 1825: training accuarcy: 0.6935
Epoch 6 step 1825: training loss: 1382.66015932258
Epoch 6 step 1826: training accuarcy: 0.714
Epoch 6 step 1826: training loss: 1383.1702468926608
Epoch 6 step 1827: training accuarcy: 0.6945
Epoch 6 step 1827: training loss: 1383.3619071618496
Epoch 6 step 1828: training accuarcy: 0.6900000000000001
Epoch 6 step 1828: training loss: 1382.6547395403131
Epoch 6 step 1829: training accuarcy: 0.715
Epoch 6 step 1829: training loss: 1383.0993106167298
Epoch 6 step 1830: training accuarcy: 0.7045
Epoch 6 step 1830: training loss: 1383.0868859466816
Epoch 6 step 1831: training accuarcy: 0.6985
Epoch 6 step 1831: training loss: 1383.250118486259
Ep

 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                   | 7/8 [13:32<01:56, 116.73s/it]

Epoch: 7
Epoch 7 step 1841: training loss: 1382.7192701712631
Epoch 7 step 1842: training accuarcy: 0.7020000000000001
Epoch 7 step 1842: training loss: 1382.7357072268237
Epoch 7 step 1843: training accuarcy: 0.7125
Epoch 7 step 1843: training loss: 1382.7605816459884
Epoch 7 step 1844: training accuarcy: 0.71
Epoch 7 step 1844: training loss: 1381.7718497909045
Epoch 7 step 1845: training accuarcy: 0.7125
Epoch 7 step 1845: training loss: 1382.2716846890207
Epoch 7 step 1846: training accuarcy: 0.7075
Epoch 7 step 1846: training loss: 1382.7309138310045
Epoch 7 step 1847: training accuarcy: 0.7145
Epoch 7 step 1847: training loss: 1382.247658817208
Epoch 7 step 1848: training accuarcy: 0.706
Epoch 7 step 1848: training loss: 1381.892354247379
Epoch 7 step 1849: training accuarcy: 0.7105
Epoch 7 step 1849: training loss: 1381.353685694988
Epoch 7 step 1850: training accuarcy: 0.7345
Epoch 7 step 1850: training loss: 1382.7014828403112
Epoch 7 step 1851: training accuarcy: 0.713
Epoch 

Epoch 7 step 1924: training accuarcy: 0.683
Epoch 7 step 1924: training loss: 1382.9508611854221
Epoch 7 step 1925: training accuarcy: 0.6910000000000001
Epoch 7 step 1925: training loss: 1383.3192341240579
Epoch 7 step 1926: training accuarcy: 0.6950000000000001
Epoch 7 step 1926: training loss: 1383.2744461996829
Epoch 7 step 1927: training accuarcy: 0.6980000000000001
Epoch 7 step 1927: training loss: 1381.864013229542
Epoch 7 step 1928: training accuarcy: 0.7145
Epoch 7 step 1928: training loss: 1382.5911852878191
Epoch 7 step 1929: training accuarcy: 0.7005
Epoch 7 step 1929: training loss: 1383.1237428876707
Epoch 7 step 1930: training accuarcy: 0.704
Epoch 7 step 1930: training loss: 1382.2987460881814
Epoch 7 step 1931: training accuarcy: 0.6875
Epoch 7 step 1931: training loss: 1383.2594127302755
Epoch 7 step 1932: training accuarcy: 0.7025
Epoch 7 step 1932: training loss: 1382.8302645735441
Epoch 7 step 1933: training accuarcy: 0.7115
Epoch 7 step 1933: training loss: 1383.1

Epoch 7 step 2005: training loss: 1383.5513405245874
Epoch 7 step 2006: training accuarcy: 0.6970000000000001
Epoch 7 step 2006: training loss: 1382.574918088769
Epoch 7 step 2007: training accuarcy: 0.7015
Epoch 7 step 2007: training loss: 1383.1070883076884
Epoch 7 step 2008: training accuarcy: 0.6845
Epoch 7 step 2008: training loss: 1382.1518069010247
Epoch 7 step 2009: training accuarcy: 0.7010000000000001
Epoch 7 step 2009: training loss: 1383.5022730360529
Epoch 7 step 2010: training accuarcy: 0.6940000000000001
Epoch 7 step 2010: training loss: 1382.775110817881
Epoch 7 step 2011: training accuarcy: 0.683
Epoch 7 step 2011: training loss: 1382.6307904970793
Epoch 7 step 2012: training accuarcy: 0.6970000000000001
Epoch 7 step 2012: training loss: 1383.2086614715845
Epoch 7 step 2013: training accuarcy: 0.6895
Epoch 7 step 2013: training loss: 1383.4964817616806
Epoch 7 step 2014: training accuarcy: 0.6935
Epoch 7 step 2014: training loss: 1383.2985106309293
Epoch 7 step 2015: t

Epoch 7 step 2086: training loss: 1382.7805021995116
Epoch 7 step 2087: training accuarcy: 0.6980000000000001
Epoch 7 step 2087: training loss: 1382.8178241724138
Epoch 7 step 2088: training accuarcy: 0.6880000000000001
Epoch 7 step 2088: training loss: 1382.1992907678505
Epoch 7 step 2089: training accuarcy: 0.708
Epoch 7 step 2089: training loss: 1382.9550620688879
Epoch 7 step 2090: training accuarcy: 0.6915
Epoch 7 step 2090: training loss: 1382.8500328882108
Epoch 7 step 2091: training accuarcy: 0.6965
Epoch 7 step 2091: training loss: 1383.0008869831283
Epoch 7 step 2092: training accuarcy: 0.7075
Epoch 7 step 2092: training loss: 1382.381436803192
Epoch 7 step 2093: training accuarcy: 0.708
Epoch 7 step 2093: training loss: 1383.3437918158495
Epoch 7 step 2094: training accuarcy: 0.6895
Epoch 7 step 2094: training loss: 1382.887745604684
Epoch 7 step 2095: training accuarcy: 0.7085
Epoch 7 step 2095: training loss: 1383.0170676577698
Epoch 7 step 2096: training accuarcy: 0.6925


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [15:31<00:00, 117.59s/it]


In [27]:
del hrm_model
T.cuda.empty_cache()

### Train PRME FM Model

In [28]:
prme_model = TorchPrmeFM(feature_dim=feat_dim, num_dim=NUM_DIM, init_mean=INIT_MEAN)
prme_model

TorchPrmeFM()

In [29]:
adam_opt = optim.Adam(prme_model.parameters(), lr=LEARNING_RATE)
schedular = optim.lr_scheduler.StepLR(adam_opt,
                                      step_size=DECAY_FREQ,
                                      gamma=DECAY_GAMMA)

In [30]:
prme_learner = FMLearner(prme_model, adam_opt, schedular, db)
prme_learner

<models.fm_learner.FMLearner at 0x1f20046b400>

In [31]:
prme_learner.fit(epoch=8,
                 loss_callback=simple_loss_callback,
                 log_dir=get_log_dir('topcoder', 'prme'))

  0%|                                                                                                                                                                 | 0/8 [00:00<?, ?it/s]

Epoch: 0
Epoch 0 step 0: training loss: 37514.48137421203
Epoch 0 step 1: training accuarcy: 0.527
Epoch 0 step 1: training loss: 36473.747391143785
Epoch 0 step 2: training accuarcy: 0.5055000000000001
Epoch 0 step 2: training loss: 35454.346101311516
Epoch 0 step 3: training accuarcy: 0.5015000000000001
Epoch 0 step 3: training loss: 34454.33436229627
Epoch 0 step 4: training accuarcy: 0.484
Epoch 0 step 4: training loss: 33472.234940401846
Epoch 0 step 5: training accuarcy: 0.5105000000000001
Epoch 0 step 5: training loss: 32522.437981548668
Epoch 0 step 6: training accuarcy: 0.5055000000000001
Epoch 0 step 6: training loss: 31582.993115826408
Epoch 0 step 7: training accuarcy: 0.5135
Epoch 0 step 7: training loss: 30670.02176044439
Epoch 0 step 8: training accuarcy: 0.5155
Epoch 0 step 8: training loss: 29788.87466102316
Epoch 0 step 9: training accuarcy: 0.507
Epoch 0 step 9: training loss: 28914.7417543135
Epoch 0 step 10: training accuarcy: 0.5165
Epoch 0 step 10: training loss:

Epoch 0 step 87: training accuarcy: 0.5925
Epoch 0 step 87: training loss: 2762.5663323613303
Epoch 0 step 88: training accuarcy: 0.599
Epoch 0 step 88: training loss: 2706.2634834567843
Epoch 0 step 89: training accuarcy: 0.5640000000000001
Epoch 0 step 89: training loss: 2644.5764398264546
Epoch 0 step 90: training accuarcy: 0.5945
Epoch 0 step 90: training loss: 2588.3158711829783
Epoch 0 step 91: training accuarcy: 0.6055
Epoch 0 step 91: training loss: 2536.562902300486
Epoch 0 step 92: training accuarcy: 0.5885
Epoch 0 step 92: training loss: 2486.265884515271
Epoch 0 step 93: training accuarcy: 0.5915
Epoch 0 step 93: training loss: 2435.0182627281147
Epoch 0 step 94: training accuarcy: 0.6
Epoch 0 step 94: training loss: 2389.118089535354
Epoch 0 step 95: training accuarcy: 0.5975
Epoch 0 step 95: training loss: 2344.12374985393
Epoch 0 step 96: training accuarcy: 0.583
Epoch 0 step 96: training loss: 2300.0552278803657
Epoch 0 step 97: training accuarcy: 0.5915
Epoch 0 step 97

Epoch 0 step 171: training loss: 1403.2463424379605
Epoch 0 step 172: training accuarcy: 0.64
Epoch 0 step 172: training loss: 1402.6984465602586
Epoch 0 step 173: training accuarcy: 0.6355000000000001
Epoch 0 step 173: training loss: 1400.553143916913
Epoch 0 step 174: training accuarcy: 0.66
Epoch 0 step 174: training loss: 1400.4041205164006
Epoch 0 step 175: training accuarcy: 0.648
Epoch 0 step 175: training loss: 1397.8913055722246
Epoch 0 step 176: training accuarcy: 0.662
Epoch 0 step 176: training loss: 1397.7636800453208
Epoch 0 step 177: training accuarcy: 0.6465
Epoch 0 step 177: training loss: 1397.2701884359565
Epoch 0 step 178: training accuarcy: 0.655
Epoch 0 step 178: training loss: 1395.899843048871
Epoch 0 step 179: training accuarcy: 0.6735
Epoch 0 step 179: training loss: 1395.2441839108803
Epoch 0 step 180: training accuarcy: 0.6715
Epoch 0 step 180: training loss: 1395.9703598188728
Epoch 0 step 181: training accuarcy: 0.646
Epoch 0 step 181: training loss: 1394.

Epoch 0 step 256: training accuarcy: 0.673
Epoch 0 step 256: training loss: 1382.9072025880077
Epoch 0 step 257: training accuarcy: 0.6890000000000001
Epoch 0 step 257: training loss: 1383.3938732460037
Epoch 0 step 258: training accuarcy: 0.6945
Epoch 0 step 258: training loss: 1383.6231843877981
Epoch 0 step 259: training accuarcy: 0.7035
Epoch 0 step 259: training loss: 1383.1637076390673
Epoch 0 step 260: training accuarcy: 0.683
Epoch 0 step 260: training loss: 1383.6408113188377
Epoch 0 step 261: training accuarcy: 0.6915
Epoch 0 step 261: training loss: 1383.8668465713042
Epoch 0 step 262: training accuarcy: 0.6755
Epoch 0 step 262: training loss: 543.4043466801741
Epoch 0 step 263: training accuarcy: 0.6948717948717948
Epoch 0: train loss 5440.148758324528, train accuarcy 0.6237958073616028
Epoch 0: valid loss 1364.5724493932605, valid accuarcy 0.7122498750686646


 12%|███████████████████                                                                                                                                     | 1/8 [01:56<13:35, 116.46s/it]

Epoch: 1
Epoch 1 step 263: training loss: 1381.9288631656204
Epoch 1 step 264: training accuarcy: 0.717
Epoch 1 step 264: training loss: 1382.8740386957263
Epoch 1 step 265: training accuarcy: 0.7205
Epoch 1 step 265: training loss: 1383.163731968013
Epoch 1 step 266: training accuarcy: 0.6905
Epoch 1 step 266: training loss: 1382.8545891421277
Epoch 1 step 267: training accuarcy: 0.6885
Epoch 1 step 267: training loss: 1382.4083080815135
Epoch 1 step 268: training accuarcy: 0.6975
Epoch 1 step 268: training loss: 1382.9286292596848
Epoch 1 step 269: training accuarcy: 0.7055
Epoch 1 step 269: training loss: 1382.8100783364525
Epoch 1 step 270: training accuarcy: 0.713
Epoch 1 step 270: training loss: 1383.6546622408448
Epoch 1 step 271: training accuarcy: 0.707
Epoch 1 step 271: training loss: 1382.8825608359637
Epoch 1 step 272: training accuarcy: 0.705
Epoch 1 step 272: training loss: 1383.230800394471
Epoch 1 step 273: training accuarcy: 0.7010000000000001
Epoch 1 step 273: trainin

Epoch 1 step 347: training loss: 1383.0618994443694
Epoch 1 step 348: training accuarcy: 0.6920000000000001
Epoch 1 step 348: training loss: 1382.913775212559
Epoch 1 step 349: training accuarcy: 0.7135
Epoch 1 step 349: training loss: 1382.9219114150665
Epoch 1 step 350: training accuarcy: 0.676
Epoch 1 step 350: training loss: 1382.7557897761963
Epoch 1 step 351: training accuarcy: 0.6980000000000001
Epoch 1 step 351: training loss: 1383.6413111256625
Epoch 1 step 352: training accuarcy: 0.7025
Epoch 1 step 352: training loss: 1382.9694863399923
Epoch 1 step 353: training accuarcy: 0.6900000000000001
Epoch 1 step 353: training loss: 1383.6165630754715
Epoch 1 step 354: training accuarcy: 0.681
Epoch 1 step 354: training loss: 1382.7907732011336
Epoch 1 step 355: training accuarcy: 0.706
Epoch 1 step 355: training loss: 1382.8123922637292
Epoch 1 step 356: training accuarcy: 0.6995
Epoch 1 step 356: training loss: 1382.4637338122902
Epoch 1 step 357: training accuarcy: 0.7055
Epoch 1 

Epoch 1 step 431: training accuarcy: 0.687
Epoch 1 step 431: training loss: 1383.544529975551
Epoch 1 step 432: training accuarcy: 0.6935
Epoch 1 step 432: training loss: 1382.7406523463846
Epoch 1 step 433: training accuarcy: 0.705
Epoch 1 step 433: training loss: 1383.8302853839634
Epoch 1 step 434: training accuarcy: 0.685
Epoch 1 step 434: training loss: 1382.263804137278
Epoch 1 step 435: training accuarcy: 0.7065
Epoch 1 step 435: training loss: 1383.2367647967167
Epoch 1 step 436: training accuarcy: 0.7015
Epoch 1 step 436: training loss: 1382.6569232613456
Epoch 1 step 437: training accuarcy: 0.6980000000000001
Epoch 1 step 437: training loss: 1382.3876202297586
Epoch 1 step 438: training accuarcy: 0.6900000000000001
Epoch 1 step 438: training loss: 1382.6201850273494
Epoch 1 step 439: training accuarcy: 0.7025
Epoch 1 step 439: training loss: 1383.1003555990235
Epoch 1 step 440: training accuarcy: 0.6900000000000001
Epoch 1 step 440: training loss: 1383.0934235754376
Epoch 1 s

Epoch 1 step 513: training loss: 1383.1139720090362
Epoch 1 step 514: training accuarcy: 0.6895
Epoch 1 step 514: training loss: 1383.1273729447241
Epoch 1 step 515: training accuarcy: 0.6885
Epoch 1 step 515: training loss: 1383.5136744847298
Epoch 1 step 516: training accuarcy: 0.6960000000000001
Epoch 1 step 516: training loss: 1382.0365476969548
Epoch 1 step 517: training accuarcy: 0.7185
Epoch 1 step 517: training loss: 1383.3986803261205
Epoch 1 step 518: training accuarcy: 0.6915
Epoch 1 step 518: training loss: 1382.1109219179843
Epoch 1 step 519: training accuarcy: 0.716
Epoch 1 step 519: training loss: 1383.17402704786
Epoch 1 step 520: training accuarcy: 0.6900000000000001
Epoch 1 step 520: training loss: 1382.6616721640212
Epoch 1 step 521: training accuarcy: 0.6995
Epoch 1 step 521: training loss: 1382.4970014507155
Epoch 1 step 522: training accuarcy: 0.7020000000000001
Epoch 1 step 522: training loss: 1383.5249716979354
Epoch 1 step 523: training accuarcy: 0.6975
Epoch 1

 25%|██████████████████████████████████████                                                                                                                  | 2/8 [03:49<11:32, 115.46s/it]

Epoch: 2
Epoch 2 step 526: training loss: 1382.0775223145868
Epoch 2 step 527: training accuarcy: 0.7135
Epoch 2 step 527: training loss: 1381.8366147226334
Epoch 2 step 528: training accuarcy: 0.7035
Epoch 2 step 528: training loss: 1381.4339110246892
Epoch 2 step 529: training accuarcy: 0.724
Epoch 2 step 529: training loss: 1381.3559395621166
Epoch 2 step 530: training accuarcy: 0.7245
Epoch 2 step 530: training loss: 1381.7553624288194
Epoch 2 step 531: training accuarcy: 0.724
Epoch 2 step 531: training loss: 1382.7913779384473
Epoch 2 step 532: training accuarcy: 0.7095
Epoch 2 step 532: training loss: 1382.1608668271333
Epoch 2 step 533: training accuarcy: 0.7000000000000001
Epoch 2 step 533: training loss: 1382.5028539405237
Epoch 2 step 534: training accuarcy: 0.736
Epoch 2 step 534: training loss: 1381.686959127311
Epoch 2 step 535: training accuarcy: 0.7035
Epoch 2 step 535: training loss: 1382.1422925118645
Epoch 2 step 536: training accuarcy: 0.7020000000000001
Epoch 2 ste

Epoch 2 step 609: training loss: 1384.0077439191318
Epoch 2 step 610: training accuarcy: 0.681
Epoch 2 step 610: training loss: 1383.333358696333
Epoch 2 step 611: training accuarcy: 0.6890000000000001
Epoch 2 step 611: training loss: 1382.9604646548516
Epoch 2 step 612: training accuarcy: 0.6930000000000001
Epoch 2 step 612: training loss: 1382.1609066388241
Epoch 2 step 613: training accuarcy: 0.705
Epoch 2 step 613: training loss: 1382.6911006912296
Epoch 2 step 614: training accuarcy: 0.6995
Epoch 2 step 614: training loss: 1383.4066355362904
Epoch 2 step 615: training accuarcy: 0.707
Epoch 2 step 615: training loss: 1381.4360403452197
Epoch 2 step 616: training accuarcy: 0.7195
Epoch 2 step 616: training loss: 1382.7716758911672
Epoch 2 step 617: training accuarcy: 0.715
Epoch 2 step 617: training loss: 1382.7695450744252
Epoch 2 step 618: training accuarcy: 0.712
Epoch 2 step 618: training loss: 1383.9049502025598
Epoch 2 step 619: training accuarcy: 0.6980000000000001
Epoch 2 st

Epoch 2 step 693: training accuarcy: 0.684
Epoch 2 step 693: training loss: 1382.9571477016398
Epoch 2 step 694: training accuarcy: 0.6990000000000001
Epoch 2 step 694: training loss: 1383.61796920351
Epoch 2 step 695: training accuarcy: 0.7055
Epoch 2 step 695: training loss: 1384.1448688496243
Epoch 2 step 696: training accuarcy: 0.6930000000000001
Epoch 2 step 696: training loss: 1383.276184700827
Epoch 2 step 697: training accuarcy: 0.6925
Epoch 2 step 697: training loss: 1382.7589471041933
Epoch 2 step 698: training accuarcy: 0.6995
Epoch 2 step 698: training loss: 1382.4871790511083
Epoch 2 step 699: training accuarcy: 0.7000000000000001
Epoch 2 step 699: training loss: 1383.3348101030208
Epoch 2 step 700: training accuarcy: 0.6835
Epoch 2 step 700: training loss: 1382.8498780024174
Epoch 2 step 701: training accuarcy: 0.6895
Epoch 2 step 701: training loss: 1383.3917578101277
Epoch 2 step 702: training accuarcy: 0.6975
Epoch 2 step 702: training loss: 1382.619333216925
Epoch 2 s

Epoch 2 step 775: training accuarcy: 0.7075
Epoch 2 step 775: training loss: 1383.2530691823151
Epoch 2 step 776: training accuarcy: 0.7020000000000001
Epoch 2 step 776: training loss: 1383.3415074586235
Epoch 2 step 777: training accuarcy: 0.6960000000000001
Epoch 2 step 777: training loss: 1382.5645377528733
Epoch 2 step 778: training accuarcy: 0.7035
Epoch 2 step 778: training loss: 1383.3859154186994
Epoch 2 step 779: training accuarcy: 0.6910000000000001
Epoch 2 step 779: training loss: 1384.055620287605
Epoch 2 step 780: training accuarcy: 0.685
Epoch 2 step 780: training loss: 1383.277577422761
Epoch 2 step 781: training accuarcy: 0.6955
Epoch 2 step 781: training loss: 1382.9202428339286
Epoch 2 step 782: training accuarcy: 0.7020000000000001
Epoch 2 step 782: training loss: 1382.89904629041
Epoch 2 step 783: training accuarcy: 0.678
Epoch 2 step 783: training loss: 1383.291107787693
Epoch 2 step 784: training accuarcy: 0.6795
Epoch 2 step 784: training loss: 1382.7444881588337

 38%|█████████████████████████████████████████████████████████                                                                                               | 3/8 [05:42<09:34, 114.83s/it]

Epoch: 3
Epoch 3 step 789: training loss: 1382.6948344196605
Epoch 3 step 790: training accuarcy: 0.7185
Epoch 3 step 790: training loss: 1381.2702644229284
Epoch 3 step 791: training accuarcy: 0.7195
Epoch 3 step 791: training loss: 1382.443284264446
Epoch 3 step 792: training accuarcy: 0.7085
Epoch 3 step 792: training loss: 1382.2035946184014
Epoch 3 step 793: training accuarcy: 0.7020000000000001
Epoch 3 step 793: training loss: 1382.3513427074772
Epoch 3 step 794: training accuarcy: 0.7025
Epoch 3 step 794: training loss: 1380.8759247979915
Epoch 3 step 795: training accuarcy: 0.72
Epoch 3 step 795: training loss: 1381.8447831274518
Epoch 3 step 796: training accuarcy: 0.7085
Epoch 3 step 796: training loss: 1381.9371576360477
Epoch 3 step 797: training accuarcy: 0.7055
Epoch 3 step 797: training loss: 1381.8638093092452
Epoch 3 step 798: training accuarcy: 0.73
Epoch 3 step 798: training loss: 1381.8935691504855
Epoch 3 step 799: training accuarcy: 0.707
Epoch 3 step 799: trainin

Epoch 3 step 873: training loss: 1383.0455499420316
Epoch 3 step 874: training accuarcy: 0.6905
Epoch 3 step 874: training loss: 1384.7291653067205
Epoch 3 step 875: training accuarcy: 0.683
Epoch 3 step 875: training loss: 1382.9634338978026
Epoch 3 step 876: training accuarcy: 0.6885
Epoch 3 step 876: training loss: 1382.3797467458207
Epoch 3 step 877: training accuarcy: 0.7095
Epoch 3 step 877: training loss: 1382.240119717867
Epoch 3 step 878: training accuarcy: 0.6945
Epoch 3 step 878: training loss: 1382.7587815712109
Epoch 3 step 879: training accuarcy: 0.6985
Epoch 3 step 879: training loss: 1381.8477590733555
Epoch 3 step 880: training accuarcy: 0.6965
Epoch 3 step 880: training loss: 1382.4198697183351
Epoch 3 step 881: training accuarcy: 0.7025
Epoch 3 step 881: training loss: 1382.6321639297348
Epoch 3 step 882: training accuarcy: 0.7000000000000001
Epoch 3 step 882: training loss: 1382.6172944492027
Epoch 3 step 883: training accuarcy: 0.6990000000000001
Epoch 3 step 883: 

Epoch 3 step 957: training accuarcy: 0.704
Epoch 3 step 957: training loss: 1382.2913814021301
Epoch 3 step 958: training accuarcy: 0.7005
Epoch 3 step 958: training loss: 1383.4684908042232
Epoch 3 step 959: training accuarcy: 0.6905
Epoch 3 step 959: training loss: 1383.0728250134239
Epoch 3 step 960: training accuarcy: 0.7010000000000001
Epoch 3 step 960: training loss: 1382.8998016617154
Epoch 3 step 961: training accuarcy: 0.6895
Epoch 3 step 961: training loss: 1382.62635410225
Epoch 3 step 962: training accuarcy: 0.7105
Epoch 3 step 962: training loss: 1383.510197930131
Epoch 3 step 963: training accuarcy: 0.6920000000000001
Epoch 3 step 963: training loss: 1382.782339482078
Epoch 3 step 964: training accuarcy: 0.7015
Epoch 3 step 964: training loss: 1383.5770460562608
Epoch 3 step 965: training accuarcy: 0.687
Epoch 3 step 965: training loss: 1382.8944597063394
Epoch 3 step 966: training accuarcy: 0.7025
Epoch 3 step 966: training loss: 1382.318713068834
Epoch 3 step 967: train

Epoch 3 step 1038: training accuarcy: 0.6985
Epoch 3 step 1038: training loss: 1383.7750460755167
Epoch 3 step 1039: training accuarcy: 0.686
Epoch 3 step 1039: training loss: 1384.15248415367
Epoch 3 step 1040: training accuarcy: 0.665
Epoch 3 step 1040: training loss: 1383.3735926124186
Epoch 3 step 1041: training accuarcy: 0.6900000000000001
Epoch 3 step 1041: training loss: 1382.433210542612
Epoch 3 step 1042: training accuarcy: 0.686
Epoch 3 step 1042: training loss: 1382.2463133346428
Epoch 3 step 1043: training accuarcy: 0.6960000000000001
Epoch 3 step 1043: training loss: 1383.6066352128764
Epoch 3 step 1044: training accuarcy: 0.684
Epoch 3 step 1044: training loss: 1382.914305794032
Epoch 3 step 1045: training accuarcy: 0.7045
Epoch 3 step 1045: training loss: 1382.8207407805858
Epoch 3 step 1046: training accuarcy: 0.7035
Epoch 3 step 1046: training loss: 1383.5247245725898
Epoch 3 step 1047: training accuarcy: 0.6995
Epoch 3 step 1047: training loss: 1381.4452054903948
Epoc

 50%|████████████████████████████████████████████████████████████████████████████                                                                            | 4/8 [07:42<07:45, 116.33s/it]

Epoch: 4
Epoch 4 step 1052: training loss: 1382.5246919652498
Epoch 4 step 1053: training accuarcy: 0.71
Epoch 4 step 1053: training loss: 1381.8458729592346
Epoch 4 step 1054: training accuarcy: 0.708
Epoch 4 step 1054: training loss: 1383.148648274755
Epoch 4 step 1055: training accuarcy: 0.704
Epoch 4 step 1055: training loss: 1382.3012138236195
Epoch 4 step 1056: training accuarcy: 0.72
Epoch 4 step 1056: training loss: 1381.511025055811
Epoch 4 step 1057: training accuarcy: 0.717
Epoch 4 step 1057: training loss: 1382.7251593311412
Epoch 4 step 1058: training accuarcy: 0.71
Epoch 4 step 1058: training loss: 1382.137966299077
Epoch 4 step 1059: training accuarcy: 0.723
Epoch 4 step 1059: training loss: 1381.7830035196864
Epoch 4 step 1060: training accuarcy: 0.712
Epoch 4 step 1060: training loss: 1382.1192834601509
Epoch 4 step 1061: training accuarcy: 0.7035
Epoch 4 step 1061: training loss: 1382.2242559358194
Epoch 4 step 1062: training accuarcy: 0.708
Epoch 4 step 1062: trainin

Epoch 4 step 1135: training accuarcy: 0.6965
Epoch 4 step 1135: training loss: 1382.5509399401856
Epoch 4 step 1136: training accuarcy: 0.6955
Epoch 4 step 1136: training loss: 1382.303881698267
Epoch 4 step 1137: training accuarcy: 0.6965
Epoch 4 step 1137: training loss: 1382.4545808159153
Epoch 4 step 1138: training accuarcy: 0.7185
Epoch 4 step 1138: training loss: 1381.7943073951442
Epoch 4 step 1139: training accuarcy: 0.7025
Epoch 4 step 1139: training loss: 1381.9264066065105
Epoch 4 step 1140: training accuarcy: 0.712
Epoch 4 step 1140: training loss: 1383.2061433088438
Epoch 4 step 1141: training accuarcy: 0.704
Epoch 4 step 1141: training loss: 1382.2743686269353
Epoch 4 step 1142: training accuarcy: 0.7185
Epoch 4 step 1142: training loss: 1383.247236501556
Epoch 4 step 1143: training accuarcy: 0.686
Epoch 4 step 1143: training loss: 1382.0403790674904
Epoch 4 step 1144: training accuarcy: 0.7010000000000001
Epoch 4 step 1144: training loss: 1383.4319116190868
Epoch 4 step 

Epoch 4 step 1216: training accuarcy: 0.668
Epoch 4 step 1216: training loss: 1382.9617814476774
Epoch 4 step 1217: training accuarcy: 0.712
Epoch 4 step 1217: training loss: 1383.166450067319
Epoch 4 step 1218: training accuarcy: 0.7025
Epoch 4 step 1218: training loss: 1382.0339700890731
Epoch 4 step 1219: training accuarcy: 0.7030000000000001
Epoch 4 step 1219: training loss: 1382.6973158426463
Epoch 4 step 1220: training accuarcy: 0.6910000000000001
Epoch 4 step 1220: training loss: 1383.7232188561632
Epoch 4 step 1221: training accuarcy: 0.6920000000000001
Epoch 4 step 1221: training loss: 1381.9970947061893
Epoch 4 step 1222: training accuarcy: 0.714
Epoch 4 step 1222: training loss: 1383.2876036518117
Epoch 4 step 1223: training accuarcy: 0.6935
Epoch 4 step 1223: training loss: 1383.3480438914078
Epoch 4 step 1224: training accuarcy: 0.7055
Epoch 4 step 1224: training loss: 1383.440626057764
Epoch 4 step 1225: training accuarcy: 0.7015
Epoch 4 step 1225: training loss: 1383.394

Epoch 4 step 1297: training loss: 1383.4334950204632
Epoch 4 step 1298: training accuarcy: 0.679
Epoch 4 step 1298: training loss: 1382.6049478996752
Epoch 4 step 1299: training accuarcy: 0.6985
Epoch 4 step 1299: training loss: 1383.1202348315921
Epoch 4 step 1300: training accuarcy: 0.6940000000000001
Epoch 4 step 1300: training loss: 1383.5203515307292
Epoch 4 step 1301: training accuarcy: 0.681
Epoch 4 step 1301: training loss: 1383.1862973337197
Epoch 4 step 1302: training accuarcy: 0.708
Epoch 4 step 1302: training loss: 1382.9444791556332
Epoch 4 step 1303: training accuarcy: 0.684
Epoch 4 step 1303: training loss: 1382.371725197218
Epoch 4 step 1304: training accuarcy: 0.6975
Epoch 4 step 1304: training loss: 1382.606559640893
Epoch 4 step 1305: training accuarcy: 0.6990000000000001
Epoch 4 step 1305: training loss: 1382.8515706737464
Epoch 4 step 1306: training accuarcy: 0.7035
Epoch 4 step 1306: training loss: 1383.454628888206
Epoch 4 step 1307: training accuarcy: 0.6875
Epo

 62%|███████████████████████████████████████████████████████████████████████████████████████████████                                                         | 5/8 [09:51<06:00, 120.03s/it]

Epoch: 5
Epoch 5 step 1315: training loss: 1381.7430139295107
Epoch 5 step 1316: training accuarcy: 0.7095
Epoch 5 step 1316: training loss: 1382.0481881308806
Epoch 5 step 1317: training accuarcy: 0.7095
Epoch 5 step 1317: training loss: 1380.6654792173902
Epoch 5 step 1318: training accuarcy: 0.7185
Epoch 5 step 1318: training loss: 1382.1917287253682
Epoch 5 step 1319: training accuarcy: 0.6925
Epoch 5 step 1319: training loss: 1382.9747114238955
Epoch 5 step 1320: training accuarcy: 0.7030000000000001
Epoch 5 step 1320: training loss: 1381.605863538038
Epoch 5 step 1321: training accuarcy: 0.7265
Epoch 5 step 1321: training loss: 1382.7886336902134
Epoch 5 step 1322: training accuarcy: 0.717
Epoch 5 step 1322: training loss: 1382.2918614195526
Epoch 5 step 1323: training accuarcy: 0.7075
Epoch 5 step 1323: training loss: 1382.7890369344545
Epoch 5 step 1324: training accuarcy: 0.6970000000000001
Epoch 5 step 1324: training loss: 1381.0536386608583
Epoch 5 step 1325: training accuar

Epoch 5 step 1397: training accuarcy: 0.708
Epoch 5 step 1397: training loss: 1382.8126362361938
Epoch 5 step 1398: training accuarcy: 0.6990000000000001
Epoch 5 step 1398: training loss: 1382.391627564742
Epoch 5 step 1399: training accuarcy: 0.705
Epoch 5 step 1399: training loss: 1383.3915142571245
Epoch 5 step 1400: training accuarcy: 0.7045
Epoch 5 step 1400: training loss: 1382.441820889064
Epoch 5 step 1401: training accuarcy: 0.6920000000000001
Epoch 5 step 1401: training loss: 1383.5258328182952
Epoch 5 step 1402: training accuarcy: 0.7005
Epoch 5 step 1402: training loss: 1382.9377967390399
Epoch 5 step 1403: training accuarcy: 0.6935
Epoch 5 step 1403: training loss: 1381.9188440805449
Epoch 5 step 1404: training accuarcy: 0.712
Epoch 5 step 1404: training loss: 1382.8926918317238
Epoch 5 step 1405: training accuarcy: 0.6970000000000001
Epoch 5 step 1405: training loss: 1383.3602291657726
Epoch 5 step 1406: training accuarcy: 0.6945
Epoch 5 step 1406: training loss: 1383.200

Epoch 5 step 1478: training loss: 1382.9995514607767
Epoch 5 step 1479: training accuarcy: 0.6930000000000001
Epoch 5 step 1479: training loss: 1382.914911136799
Epoch 5 step 1480: training accuarcy: 0.6815
Epoch 5 step 1480: training loss: 1382.897355626293
Epoch 5 step 1481: training accuarcy: 0.6960000000000001
Epoch 5 step 1481: training loss: 1383.0658116648556
Epoch 5 step 1482: training accuarcy: 0.6980000000000001
Epoch 5 step 1482: training loss: 1382.016961750663
Epoch 5 step 1483: training accuarcy: 0.6960000000000001
Epoch 5 step 1483: training loss: 1383.6033826369344
Epoch 5 step 1484: training accuarcy: 0.6865
Epoch 5 step 1484: training loss: 1382.7465324346747
Epoch 5 step 1485: training accuarcy: 0.7010000000000001
Epoch 5 step 1485: training loss: 1382.7022507684408
Epoch 5 step 1486: training accuarcy: 0.7055
Epoch 5 step 1486: training loss: 1382.9449926222337
Epoch 5 step 1487: training accuarcy: 0.6895
Epoch 5 step 1487: training loss: 1383.5467038109782
Epoch 5 

Epoch 5 step 1558: training accuarcy: 0.6980000000000001
Epoch 5 step 1558: training loss: 1383.0066986680454
Epoch 5 step 1559: training accuarcy: 0.6915
Epoch 5 step 1559: training loss: 1382.1709518592154
Epoch 5 step 1560: training accuarcy: 0.717
Epoch 5 step 1560: training loss: 1383.010156249655
Epoch 5 step 1561: training accuarcy: 0.6900000000000001
Epoch 5 step 1561: training loss: 1383.0715730022264
Epoch 5 step 1562: training accuarcy: 0.6880000000000001
Epoch 5 step 1562: training loss: 1382.89232141381
Epoch 5 step 1563: training accuarcy: 0.6935
Epoch 5 step 1563: training loss: 1382.9613326499905
Epoch 5 step 1564: training accuarcy: 0.6880000000000001
Epoch 5 step 1564: training loss: 1382.5014619569124
Epoch 5 step 1565: training accuarcy: 0.6940000000000001
Epoch 5 step 1565: training loss: 1382.7205394417886
Epoch 5 step 1566: training accuarcy: 0.711
Epoch 5 step 1566: training loss: 1382.7197085007454
Epoch 5 step 1567: training accuarcy: 0.7135
Epoch 5 step 1567:

 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                      | 6/8 [11:58<04:04, 122.24s/it]

Epoch: 6
Epoch 6 step 1578: training loss: 1381.9675770691683
Epoch 6 step 1579: training accuarcy: 0.712
Epoch 6 step 1579: training loss: 1382.0272766838964
Epoch 6 step 1580: training accuarcy: 0.715
Epoch 6 step 1580: training loss: 1381.6563032627062
Epoch 6 step 1581: training accuarcy: 0.723
Epoch 6 step 1581: training loss: 1381.134598980179
Epoch 6 step 1582: training accuarcy: 0.724
Epoch 6 step 1582: training loss: 1382.3083225159285
Epoch 6 step 1583: training accuarcy: 0.7145
Epoch 6 step 1583: training loss: 1380.7826923964485
Epoch 6 step 1584: training accuarcy: 0.728
Epoch 6 step 1584: training loss: 1381.892443708029
Epoch 6 step 1585: training accuarcy: 0.7385
Epoch 6 step 1585: training loss: 1381.9904319996722
Epoch 6 step 1586: training accuarcy: 0.7115
Epoch 6 step 1586: training loss: 1382.297759629386
Epoch 6 step 1587: training accuarcy: 0.7255
Epoch 6 step 1587: training loss: 1381.7433709642182
Epoch 6 step 1588: training accuarcy: 0.713
Epoch 6 step 1588: t

Epoch 6 step 1660: training loss: 1382.513903374078
Epoch 6 step 1661: training accuarcy: 0.706
Epoch 6 step 1661: training loss: 1382.1241602869293
Epoch 6 step 1662: training accuarcy: 0.7015
Epoch 6 step 1662: training loss: 1382.8935729634572
Epoch 6 step 1663: training accuarcy: 0.7075
Epoch 6 step 1663: training loss: 1383.4001257190487
Epoch 6 step 1664: training accuarcy: 0.7035
Epoch 6 step 1664: training loss: 1383.573750430798
Epoch 6 step 1665: training accuarcy: 0.6990000000000001
Epoch 6 step 1665: training loss: 1383.2886481336964
Epoch 6 step 1666: training accuarcy: 0.6980000000000001
Epoch 6 step 1666: training loss: 1383.1548836876104
Epoch 6 step 1667: training accuarcy: 0.6960000000000001
Epoch 6 step 1667: training loss: 1381.9606028417168
Epoch 6 step 1668: training accuarcy: 0.7045
Epoch 6 step 1668: training loss: 1383.0989607509284
Epoch 6 step 1669: training accuarcy: 0.6995
Epoch 6 step 1669: training loss: 1382.4265137468155
Epoch 6 step 1670: training accu

Epoch 6 step 1741: training accuarcy: 0.6955
Epoch 6 step 1741: training loss: 1382.666794818975
Epoch 6 step 1742: training accuarcy: 0.7000000000000001
Epoch 6 step 1742: training loss: 1382.29850806044
Epoch 6 step 1743: training accuarcy: 0.7045
Epoch 6 step 1743: training loss: 1383.816248352409
Epoch 6 step 1744: training accuarcy: 0.6960000000000001
Epoch 6 step 1744: training loss: 1383.292257358835
Epoch 6 step 1745: training accuarcy: 0.6920000000000001
Epoch 6 step 1745: training loss: 1382.367831435093
Epoch 6 step 1746: training accuarcy: 0.711
Epoch 6 step 1746: training loss: 1383.8754677500765
Epoch 6 step 1747: training accuarcy: 0.6845
Epoch 6 step 1747: training loss: 1382.769616835386
Epoch 6 step 1748: training accuarcy: 0.7030000000000001
Epoch 6 step 1748: training loss: 1382.6848318598868
Epoch 6 step 1749: training accuarcy: 0.6845
Epoch 6 step 1749: training loss: 1383.3193289278927
Epoch 6 step 1750: training accuarcy: 0.6890000000000001
Epoch 6 step 1750: tr

Epoch 6 step 1821: training loss: 1382.4626598323562
Epoch 6 step 1822: training accuarcy: 0.6990000000000001
Epoch 6 step 1822: training loss: 1383.444971459152
Epoch 6 step 1823: training accuarcy: 0.6945
Epoch 6 step 1823: training loss: 1383.5062680591793
Epoch 6 step 1824: training accuarcy: 0.6900000000000001
Epoch 6 step 1824: training loss: 1382.4972642726038
Epoch 6 step 1825: training accuarcy: 0.6930000000000001
Epoch 6 step 1825: training loss: 1382.6726609411392
Epoch 6 step 1826: training accuarcy: 0.6960000000000001
Epoch 6 step 1826: training loss: 1381.5563306478848
Epoch 6 step 1827: training accuarcy: 0.7010000000000001
Epoch 6 step 1827: training loss: 1381.7114711750353
Epoch 6 step 1828: training accuarcy: 0.7020000000000001
Epoch 6 step 1828: training loss: 1382.963432468188
Epoch 6 step 1829: training accuarcy: 0.7085
Epoch 6 step 1829: training loss: 1382.7810744554927
Epoch 6 step 1830: training accuarcy: 0.7030000000000001
Epoch 6 step 1830: training loss: 13

 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                   | 7/8 [14:03<02:03, 123.02s/it]

Epoch: 7
Epoch 7 step 1841: training loss: 1382.0603360998941
Epoch 7 step 1842: training accuarcy: 0.7085
Epoch 7 step 1842: training loss: 1381.9663085083093
Epoch 7 step 1843: training accuarcy: 0.7030000000000001
Epoch 7 step 1843: training loss: 1382.155464600916
Epoch 7 step 1844: training accuarcy: 0.7030000000000001
Epoch 7 step 1844: training loss: 1381.8843937109814
Epoch 7 step 1845: training accuarcy: 0.713
Epoch 7 step 1845: training loss: 1383.6670187086052
Epoch 7 step 1846: training accuarcy: 0.6920000000000001
Epoch 7 step 1846: training loss: 1381.3106253079434
Epoch 7 step 1847: training accuarcy: 0.715
Epoch 7 step 1847: training loss: 1381.496890954251
Epoch 7 step 1848: training accuarcy: 0.7135
Epoch 7 step 1848: training loss: 1382.0257215977017
Epoch 7 step 1849: training accuarcy: 0.7145
Epoch 7 step 1849: training loss: 1382.4808050050588
Epoch 7 step 1850: training accuarcy: 0.7125
Epoch 7 step 1850: training loss: 1381.6245515092833
Epoch 7 step 1851: train

Epoch 7 step 1923: training loss: 1383.0912891277162
Epoch 7 step 1924: training accuarcy: 0.706
Epoch 7 step 1924: training loss: 1382.6613293629712
Epoch 7 step 1925: training accuarcy: 0.6970000000000001
Epoch 7 step 1925: training loss: 1383.015189897159
Epoch 7 step 1926: training accuarcy: 0.6915
Epoch 7 step 1926: training loss: 1383.0345461815352
Epoch 7 step 1927: training accuarcy: 0.682
Epoch 7 step 1927: training loss: 1382.1299860127394
Epoch 7 step 1928: training accuarcy: 0.704
Epoch 7 step 1928: training loss: 1382.3489006138675
Epoch 7 step 1929: training accuarcy: 0.7065
Epoch 7 step 1929: training loss: 1381.8290284077368
Epoch 7 step 1930: training accuarcy: 0.713
Epoch 7 step 1930: training loss: 1382.6977309232643
Epoch 7 step 1931: training accuarcy: 0.7015
Epoch 7 step 1931: training loss: 1383.862271691608
Epoch 7 step 1932: training accuarcy: 0.6920000000000001
Epoch 7 step 1932: training loss: 1383.1418919672076
Epoch 7 step 1933: training accuarcy: 0.6960000

Epoch 7 step 2005: training accuarcy: 0.677
Epoch 7 step 2005: training loss: 1382.4426488788642
Epoch 7 step 2006: training accuarcy: 0.7115
Epoch 7 step 2006: training loss: 1384.0143155934102
Epoch 7 step 2007: training accuarcy: 0.6895
Epoch 7 step 2007: training loss: 1381.786588647689
Epoch 7 step 2008: training accuarcy: 0.7020000000000001
Epoch 7 step 2008: training loss: 1382.724263304761
Epoch 7 step 2009: training accuarcy: 0.6945
Epoch 7 step 2009: training loss: 1383.3853444019803
Epoch 7 step 2010: training accuarcy: 0.6990000000000001
Epoch 7 step 2010: training loss: 1382.1493924876793
Epoch 7 step 2011: training accuarcy: 0.7115
Epoch 7 step 2011: training loss: 1382.429153467883
Epoch 7 step 2012: training accuarcy: 0.711
Epoch 7 step 2012: training loss: 1381.6580251088676
Epoch 7 step 2013: training accuarcy: 0.707
Epoch 7 step 2013: training loss: 1382.0272257235463
Epoch 7 step 2014: training accuarcy: 0.715
Epoch 7 step 2014: training loss: 1383.463280344703
Epoc

Epoch 7 step 2086: training accuarcy: 0.683
Epoch 7 step 2086: training loss: 1383.475166157004
Epoch 7 step 2087: training accuarcy: 0.6910000000000001
Epoch 7 step 2087: training loss: 1382.605241274103
Epoch 7 step 2088: training accuarcy: 0.7065
Epoch 7 step 2088: training loss: 1382.853922574848
Epoch 7 step 2089: training accuarcy: 0.7135
Epoch 7 step 2089: training loss: 1382.7739855277407
Epoch 7 step 2090: training accuarcy: 0.7035
Epoch 7 step 2090: training loss: 1381.6857171824581
Epoch 7 step 2091: training accuarcy: 0.7175
Epoch 7 step 2091: training loss: 1382.2089479325002
Epoch 7 step 2092: training accuarcy: 0.6975
Epoch 7 step 2092: training loss: 1384.6542699408378
Epoch 7 step 2093: training accuarcy: 0.679
Epoch 7 step 2093: training loss: 1382.7846881925848
Epoch 7 step 2094: training accuarcy: 0.7015
Epoch 7 step 2094: training loss: 1382.872421434367
Epoch 7 step 2095: training accuarcy: 0.6940000000000001
Epoch 7 step 2095: training loss: 1383.661988762389
Epo

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [16:07<00:00, 123.26s/it]


In [32]:
del prme_model
T.cuda.empty_cache()

### Train Trans FM Model

In [33]:
trans_model = TorchTransFM(feature_dim=feat_dim, num_dim=NUM_DIM, init_mean=INIT_MEAN)
trans_model

TorchTransFM()

In [34]:
adam_opt = optim.Adam(trans_model.parameters(), lr=LEARNING_RATE)
schedular = optim.lr_scheduler.StepLR(adam_opt,
                                      step_size=DECAY_FREQ,
                                      gamma=DECAY_GAMMA)

In [42]:
trans_learner = FMLearner(trans_model, adam_opt, schedular, db)
trans_learner

<models.fm_learner.FMLearner at 0x1f2000ef860>

In [36]:
trans_learner.fit(epoch=8,
                  loss_callback=trans_loss_callback,
                  log_dir=get_log_dir('topcoder', 'trans'))

  0%|                                                                                                                                                                 | 0/8 [00:00<?, ?it/s]

Epoch: 0
Epoch 0 step 0: training loss: 37807.26401193977
Epoch 0 step 1: training accuarcy: 0.4955
Epoch 0 step 1: training loss: 36749.778786841816
Epoch 0 step 2: training accuarcy: 0.504
Epoch 0 step 2: training loss: 35706.60817088014
Epoch 0 step 3: training accuarcy: 0.5
Epoch 0 step 3: training loss: 34674.50367937326
Epoch 0 step 4: training accuarcy: 0.5275
Epoch 0 step 4: training loss: 33686.3987004469
Epoch 0 step 5: training accuarcy: 0.5125
Epoch 0 step 5: training loss: 32706.96820491349
Epoch 0 step 6: training accuarcy: 0.5165
Epoch 0 step 6: training loss: 31751.5109393562
Epoch 0 step 7: training accuarcy: 0.505
Epoch 0 step 7: training loss: 30820.65553823673
Epoch 0 step 8: training accuarcy: 0.524
Epoch 0 step 8: training loss: 29908.863742371064
Epoch 0 step 9: training accuarcy: 0.522
Epoch 0 step 9: training loss: 29012.36624609563
Epoch 0 step 10: training accuarcy: 0.529
Epoch 0 step 10: training loss: 28151.494881656392
Epoch 0 step 11: training accuarcy: 0

Epoch 0 step 86: training loss: 2608.6593532551665
Epoch 0 step 87: training accuarcy: 0.7025
Epoch 0 step 87: training loss: 2544.652764805356
Epoch 0 step 88: training accuarcy: 0.6880000000000001
Epoch 0 step 88: training loss: 2468.6590752649295
Epoch 0 step 89: training accuarcy: 0.71
Epoch 0 step 89: training loss: 2423.153420795797
Epoch 0 step 90: training accuarcy: 0.6975
Epoch 0 step 90: training loss: 2361.7523445788593
Epoch 0 step 91: training accuarcy: 0.713
Epoch 0 step 91: training loss: 2310.594598037431
Epoch 0 step 92: training accuarcy: 0.6940000000000001
Epoch 0 step 92: training loss: 2254.93353079578
Epoch 0 step 93: training accuarcy: 0.712
Epoch 0 step 93: training loss: 2206.2737715561316
Epoch 0 step 94: training accuarcy: 0.7135
Epoch 0 step 94: training loss: 2179.5454209468944
Epoch 0 step 95: training accuarcy: 0.684
Epoch 0 step 95: training loss: 2129.032255605367
Epoch 0 step 96: training accuarcy: 0.6985
Epoch 0 step 96: training loss: 2075.8736270671

Epoch 0 step 172: training accuarcy: 0.7435
Epoch 0 step 172: training loss: 1142.4687222205114
Epoch 0 step 173: training accuarcy: 0.7425
Epoch 0 step 173: training loss: 1148.4498302713594
Epoch 0 step 174: training accuarcy: 0.748
Epoch 0 step 174: training loss: 1126.3964042349307
Epoch 0 step 175: training accuarcy: 0.751
Epoch 0 step 175: training loss: 1175.33577255694
Epoch 0 step 176: training accuarcy: 0.7195
Epoch 0 step 176: training loss: 1150.6492550502567
Epoch 0 step 177: training accuarcy: 0.738
Epoch 0 step 177: training loss: 1154.2970654569183
Epoch 0 step 178: training accuarcy: 0.751
Epoch 0 step 178: training loss: 1132.282888711028
Epoch 0 step 179: training accuarcy: 0.7485
Epoch 0 step 179: training loss: 1129.9585267991922
Epoch 0 step 180: training accuarcy: 0.755
Epoch 0 step 180: training loss: 1151.8311434767393
Epoch 0 step 181: training accuarcy: 0.737
Epoch 0 step 181: training loss: 1144.1199802066885
Epoch 0 step 182: training accuarcy: 0.7335
Epoch

Epoch 0 step 255: training loss: 1084.4392297650447
Epoch 0 step 256: training accuarcy: 0.751
Epoch 0 step 256: training loss: 1064.7772086371895
Epoch 0 step 257: training accuarcy: 0.7495
Epoch 0 step 257: training loss: 1078.0242551238125
Epoch 0 step 258: training accuarcy: 0.7555000000000001
Epoch 0 step 258: training loss: 1080.5240449647833
Epoch 0 step 259: training accuarcy: 0.7445
Epoch 0 step 259: training loss: 1070.4557052705234
Epoch 0 step 260: training accuarcy: 0.76
Epoch 0 step 260: training loss: 1051.8578109743326
Epoch 0 step 261: training accuarcy: 0.7795
Epoch 0 step 261: training loss: 1052.9890355081523
Epoch 0 step 262: training accuarcy: 0.7675000000000001
Epoch 0 step 262: training loss: 419.62126561278643
Epoch 0 step 263: training accuarcy: 0.7769230769230769
Epoch 0: train loss 5223.848272861555, train accuarcy 0.6741687059402466
Epoch 0: valid loss 1091.9839639880483, valid accuarcy 0.7284212708473206


 12%|███████████████████                                                                                                                                     | 1/8 [02:13<15:37, 133.97s/it]

Epoch: 1
Epoch 1 step 263: training loss: 968.1247141387189
Epoch 1 step 264: training accuarcy: 0.8155
Epoch 1 step 264: training loss: 974.9155207398045
Epoch 1 step 265: training accuarcy: 0.799
Epoch 1 step 265: training loss: 968.9799277918569
Epoch 1 step 266: training accuarcy: 0.8115
Epoch 1 step 266: training loss: 969.3743419102765
Epoch 1 step 267: training accuarcy: 0.8095
Epoch 1 step 267: training loss: 975.4692887293817
Epoch 1 step 268: training accuarcy: 0.802
Epoch 1 step 268: training loss: 986.443471727153
Epoch 1 step 269: training accuarcy: 0.801
Epoch 1 step 269: training loss: 965.1668782718472
Epoch 1 step 270: training accuarcy: 0.809
Epoch 1 step 270: training loss: 978.3685398036984
Epoch 1 step 271: training accuarcy: 0.7915
Epoch 1 step 271: training loss: 977.2293235825683
Epoch 1 step 272: training accuarcy: 0.799
Epoch 1 step 272: training loss: 976.9488489259388
Epoch 1 step 273: training accuarcy: 0.798
Epoch 1 step 273: training loss: 968.67655195534

Epoch 1 step 349: training accuarcy: 0.7975
Epoch 1 step 349: training loss: 950.8717818009897
Epoch 1 step 350: training accuarcy: 0.798
Epoch 1 step 350: training loss: 959.7410874457715
Epoch 1 step 351: training accuarcy: 0.8015
Epoch 1 step 351: training loss: 939.7930656540661
Epoch 1 step 352: training accuarcy: 0.7995
Epoch 1 step 352: training loss: 928.928823646394
Epoch 1 step 353: training accuarcy: 0.809
Epoch 1 step 353: training loss: 940.3219438879934
Epoch 1 step 354: training accuarcy: 0.809
Epoch 1 step 354: training loss: 940.3015358129531
Epoch 1 step 355: training accuarcy: 0.8025
Epoch 1 step 355: training loss: 968.1069796320263
Epoch 1 step 356: training accuarcy: 0.788
Epoch 1 step 356: training loss: 937.4825802975965
Epoch 1 step 357: training accuarcy: 0.8130000000000001
Epoch 1 step 357: training loss: 955.3890597764367
Epoch 1 step 358: training accuarcy: 0.7975
Epoch 1 step 358: training loss: 944.1715121254935
Epoch 1 step 359: training accuarcy: 0.808


Epoch 1 step 434: training loss: 915.6342369499238
Epoch 1 step 435: training accuarcy: 0.805
Epoch 1 step 435: training loss: 885.4818930078619
Epoch 1 step 436: training accuarcy: 0.8295
Epoch 1 step 436: training loss: 921.8968032519256
Epoch 1 step 437: training accuarcy: 0.8
Epoch 1 step 437: training loss: 926.9152894747033
Epoch 1 step 438: training accuarcy: 0.8055
Epoch 1 step 438: training loss: 880.693478972882
Epoch 1 step 439: training accuarcy: 0.8240000000000001
Epoch 1 step 439: training loss: 870.7242582027243
Epoch 1 step 440: training accuarcy: 0.8300000000000001
Epoch 1 step 440: training loss: 932.6479720911238
Epoch 1 step 441: training accuarcy: 0.805
Epoch 1 step 441: training loss: 930.8609398827883
Epoch 1 step 442: training accuarcy: 0.79
Epoch 1 step 442: training loss: 902.011950630336
Epoch 1 step 443: training accuarcy: 0.8145
Epoch 1 step 443: training loss: 910.8092236093823
Epoch 1 step 444: training accuarcy: 0.8115
Epoch 1 step 444: training loss: 90

Epoch 1 step 518: training accuarcy: 0.809
Epoch 1 step 518: training loss: 847.6455863895205
Epoch 1 step 519: training accuarcy: 0.834
Epoch 1 step 519: training loss: 879.2882706613302
Epoch 1 step 520: training accuarcy: 0.8160000000000001
Epoch 1 step 520: training loss: 881.0519361443079
Epoch 1 step 521: training accuarcy: 0.8160000000000001
Epoch 1 step 521: training loss: 882.7376274444861
Epoch 1 step 522: training accuarcy: 0.8290000000000001
Epoch 1 step 522: training loss: 884.1038942775203
Epoch 1 step 523: training accuarcy: 0.8095
Epoch 1 step 523: training loss: 859.9500563503879
Epoch 1 step 524: training accuarcy: 0.8230000000000001
Epoch 1 step 524: training loss: 921.6668859603786
Epoch 1 step 525: training accuarcy: 0.7945
Epoch 1 step 525: training loss: 366.212572380409
Epoch 1 step 526: training accuarcy: 0.808974358974359
Epoch 1: train loss 926.5457459866907, train accuarcy 0.780495822429657
Epoch 1: valid loss 1030.1578735028932, valid accuarcy 0.75075805187

 25%|██████████████████████████████████████                                                                                                                  | 2/8 [04:27<13:23, 133.85s/it]

Epoch: 2
Epoch 2 step 526: training loss: 796.1800074425452
Epoch 2 step 527: training accuarcy: 0.85
Epoch 2 step 527: training loss: 812.4522747391758
Epoch 2 step 528: training accuarcy: 0.847
Epoch 2 step 528: training loss: 780.7816190859206
Epoch 2 step 529: training accuarcy: 0.854
Epoch 2 step 529: training loss: 776.5738854869907
Epoch 2 step 530: training accuarcy: 0.8525
Epoch 2 step 530: training loss: 820.261246135934
Epoch 2 step 531: training accuarcy: 0.8415
Epoch 2 step 531: training loss: 796.089065489707
Epoch 2 step 532: training accuarcy: 0.8455
Epoch 2 step 532: training loss: 807.46911121999
Epoch 2 step 533: training accuarcy: 0.85
Epoch 2 step 533: training loss: 804.490586968923
Epoch 2 step 534: training accuarcy: 0.8535
Epoch 2 step 534: training loss: 783.8217581882924
Epoch 2 step 535: training accuarcy: 0.8655
Epoch 2 step 535: training loss: 796.2572558360356
Epoch 2 step 536: training accuarcy: 0.846
Epoch 2 step 536: training loss: 797.5429647445313
Ep

Epoch 2 step 613: training accuarcy: 0.852
Epoch 2 step 613: training loss: 774.1487747745001
Epoch 2 step 614: training accuarcy: 0.859
Epoch 2 step 614: training loss: 749.2056961632467
Epoch 2 step 615: training accuarcy: 0.8595
Epoch 2 step 615: training loss: 782.1607213728528
Epoch 2 step 616: training accuarcy: 0.8575
Epoch 2 step 616: training loss: 792.8569721290753
Epoch 2 step 617: training accuarcy: 0.8465
Epoch 2 step 617: training loss: 777.4729972415197
Epoch 2 step 618: training accuarcy: 0.851
Epoch 2 step 618: training loss: 760.3674300183445
Epoch 2 step 619: training accuarcy: 0.859
Epoch 2 step 619: training loss: 787.8744260398984
Epoch 2 step 620: training accuarcy: 0.845
Epoch 2 step 620: training loss: 774.7002469387361
Epoch 2 step 621: training accuarcy: 0.8615
Epoch 2 step 621: training loss: 779.4076274004615
Epoch 2 step 622: training accuarcy: 0.8545
Epoch 2 step 622: training loss: 776.9889290073825
Epoch 2 step 623: training accuarcy: 0.8525
Epoch 2 ste

Epoch 2 step 699: training loss: 743.8939994949307
Epoch 2 step 700: training accuarcy: 0.859
Epoch 2 step 700: training loss: 767.8181413563457
Epoch 2 step 701: training accuarcy: 0.845
Epoch 2 step 701: training loss: 756.4298216116127
Epoch 2 step 702: training accuarcy: 0.853
Epoch 2 step 702: training loss: 770.4160470194092
Epoch 2 step 703: training accuarcy: 0.8555
Epoch 2 step 703: training loss: 743.8601747379605
Epoch 2 step 704: training accuarcy: 0.8615
Epoch 2 step 704: training loss: 738.7535914079012
Epoch 2 step 705: training accuarcy: 0.8685
Epoch 2 step 705: training loss: 744.9842776048594
Epoch 2 step 706: training accuarcy: 0.8625
Epoch 2 step 706: training loss: 739.8717323576851
Epoch 2 step 707: training accuarcy: 0.8685
Epoch 2 step 707: training loss: 731.1224802882629
Epoch 2 step 708: training accuarcy: 0.8585
Epoch 2 step 708: training loss: 743.7190371733068
Epoch 2 step 709: training accuarcy: 0.852
Epoch 2 step 709: training loss: 747.0002384990096
Epo

Epoch 2 step 786: training accuarcy: 0.8635
Epoch 2 step 786: training loss: 705.8044547701887
Epoch 2 step 787: training accuarcy: 0.875
Epoch 2 step 787: training loss: 745.0177152427982
Epoch 2 step 788: training accuarcy: 0.8595
Epoch 2 step 788: training loss: 294.58378716480166
Epoch 2 step 789: training accuarcy: 0.8692307692307693
Epoch 2: train loss 762.3531646269417, train accuarcy 0.8305734395980835
Epoch 2: valid loss 989.3576875305267, valid accuarcy 0.761976957321167


 38%|█████████████████████████████████████████████████████████                                                                                               | 3/8 [06:41<11:09, 133.83s/it]

Epoch: 3
Epoch 3 step 789: training loss: 662.8766847026562
Epoch 3 step 790: training accuarcy: 0.8915000000000001
Epoch 3 step 790: training loss: 641.7494012508415
Epoch 3 step 791: training accuarcy: 0.897
Epoch 3 step 791: training loss: 653.0795543581189
Epoch 3 step 792: training accuarcy: 0.89
Epoch 3 step 792: training loss: 641.8297138849573
Epoch 3 step 793: training accuarcy: 0.8995
Epoch 3 step 793: training loss: 652.5418764175977
Epoch 3 step 794: training accuarcy: 0.8835000000000001
Epoch 3 step 794: training loss: 615.5849772180482
Epoch 3 step 795: training accuarcy: 0.909
Epoch 3 step 795: training loss: 636.9408397253416
Epoch 3 step 796: training accuarcy: 0.9035
Epoch 3 step 796: training loss: 657.5432947642714
Epoch 3 step 797: training accuarcy: 0.893
Epoch 3 step 797: training loss: 655.2370914042366
Epoch 3 step 798: training accuarcy: 0.8915000000000001
Epoch 3 step 798: training loss: 652.8417217789472
Epoch 3 step 799: training accuarcy: 0.886
Epoch 3 ste

Epoch 3 step 873: training loss: 631.7020458312045
Epoch 3 step 874: training accuarcy: 0.898
Epoch 3 step 874: training loss: 614.8507439320256
Epoch 3 step 875: training accuarcy: 0.904
Epoch 3 step 875: training loss: 610.3429885348747
Epoch 3 step 876: training accuarcy: 0.9055
Epoch 3 step 876: training loss: 640.9670313974943
Epoch 3 step 877: training accuarcy: 0.899
Epoch 3 step 877: training loss: 594.5607772476968
Epoch 3 step 878: training accuarcy: 0.9145
Epoch 3 step 878: training loss: 633.6067317804761
Epoch 3 step 879: training accuarcy: 0.886
Epoch 3 step 879: training loss: 617.5884767642145
Epoch 3 step 880: training accuarcy: 0.9015
Epoch 3 step 880: training loss: 617.2515007800687
Epoch 3 step 881: training accuarcy: 0.903
Epoch 3 step 881: training loss: 625.2582513765694
Epoch 3 step 882: training accuarcy: 0.899
Epoch 3 step 882: training loss: 632.0769984484552
Epoch 3 step 883: training accuarcy: 0.889
Epoch 3 step 883: training loss: 629.4311883065981
Epoch 

Epoch 3 step 960: training accuarcy: 0.887
Epoch 3 step 960: training loss: 613.5972854995517
Epoch 3 step 961: training accuarcy: 0.903
Epoch 3 step 961: training loss: 592.151937238631
Epoch 3 step 962: training accuarcy: 0.914
Epoch 3 step 962: training loss: 597.8531012286089
Epoch 3 step 963: training accuarcy: 0.91
Epoch 3 step 963: training loss: 586.7394798985896
Epoch 3 step 964: training accuarcy: 0.909
Epoch 3 step 964: training loss: 597.3925989849918
Epoch 3 step 965: training accuarcy: 0.906
Epoch 3 step 965: training loss: 626.650744639278
Epoch 3 step 966: training accuarcy: 0.89
Epoch 3 step 966: training loss: 602.5752597330887
Epoch 3 step 967: training accuarcy: 0.909
Epoch 3 step 967: training loss: 587.6273881538823
Epoch 3 step 968: training accuarcy: 0.9115
Epoch 3 step 968: training loss: 581.9788915852397
Epoch 3 step 969: training accuarcy: 0.9095
Epoch 3 step 969: training loss: 597.8279681108854
Epoch 3 step 970: training accuarcy: 0.905
Epoch 3 step 970: t

Epoch 3 step 1045: training loss: 563.8318709796129
Epoch 3 step 1046: training accuarcy: 0.915
Epoch 3 step 1046: training loss: 605.913150342893
Epoch 3 step 1047: training accuarcy: 0.901
Epoch 3 step 1047: training loss: 572.8340791039749
Epoch 3 step 1048: training accuarcy: 0.913
Epoch 3 step 1048: training loss: 565.5807341276164
Epoch 3 step 1049: training accuarcy: 0.915
Epoch 3 step 1049: training loss: 585.5508826188516
Epoch 3 step 1050: training accuarcy: 0.9105
Epoch 3 step 1050: training loss: 570.3915620121885
Epoch 3 step 1051: training accuarcy: 0.9155
Epoch 3 step 1051: training loss: 233.66633421860715
Epoch 3 step 1052: training accuarcy: 0.9166666666666666
Epoch 3: train loss 610.4419720792491, train accuarcy 0.8780782222747803
Epoch 3: valid loss 924.4322790288536, valid accuarcy 0.7848190665245056


 50%|████████████████████████████████████████████████████████████████████████████                                                                            | 4/8 [08:55<08:55, 133.83s/it]

Epoch: 4
Epoch 4 step 1052: training loss: 492.93881175929664
Epoch 4 step 1053: training accuarcy: 0.9400000000000001
Epoch 4 step 1053: training loss: 490.8071296319089
Epoch 4 step 1054: training accuarcy: 0.9420000000000001
Epoch 4 step 1054: training loss: 487.3139157325017
Epoch 4 step 1055: training accuarcy: 0.9390000000000001
Epoch 4 step 1055: training loss: 492.41937599976586
Epoch 4 step 1056: training accuarcy: 0.9415
Epoch 4 step 1056: training loss: 507.1195418844647
Epoch 4 step 1057: training accuarcy: 0.9315
Epoch 4 step 1057: training loss: 502.2970678688273
Epoch 4 step 1058: training accuarcy: 0.9345
Epoch 4 step 1058: training loss: 483.9729111697408
Epoch 4 step 1059: training accuarcy: 0.9455
Epoch 4 step 1059: training loss: 495.1825056675923
Epoch 4 step 1060: training accuarcy: 0.933
Epoch 4 step 1060: training loss: 519.3698487515812
Epoch 4 step 1061: training accuarcy: 0.9325
Epoch 4 step 1061: training loss: 486.655807742015
Epoch 4 step 1062: training ac

Epoch 4 step 1134: training accuarcy: 0.933
Epoch 4 step 1134: training loss: 500.6057543846951
Epoch 4 step 1135: training accuarcy: 0.931
Epoch 4 step 1135: training loss: 476.81830133964263
Epoch 4 step 1136: training accuarcy: 0.9355
Epoch 4 step 1136: training loss: 500.23706419609647
Epoch 4 step 1137: training accuarcy: 0.9420000000000001
Epoch 4 step 1137: training loss: 492.7494687494114
Epoch 4 step 1138: training accuarcy: 0.933
Epoch 4 step 1138: training loss: 472.97427583483204
Epoch 4 step 1139: training accuarcy: 0.9385
Epoch 4 step 1139: training loss: 484.9685377202417
Epoch 4 step 1140: training accuarcy: 0.9410000000000001
Epoch 4 step 1140: training loss: 483.04747723974407
Epoch 4 step 1141: training accuarcy: 0.9355
Epoch 4 step 1141: training loss: 478.6710343315687
Epoch 4 step 1142: training accuarcy: 0.937
Epoch 4 step 1142: training loss: 490.88491428158267
Epoch 4 step 1143: training accuarcy: 0.9410000000000001
Epoch 4 step 1143: training loss: 494.2522838

Epoch 4 step 1215: training accuarcy: 0.9470000000000001
Epoch 4 step 1215: training loss: 469.529197750791
Epoch 4 step 1216: training accuarcy: 0.9385
Epoch 4 step 1216: training loss: 464.0903094449788
Epoch 4 step 1217: training accuarcy: 0.9420000000000001
Epoch 4 step 1217: training loss: 444.28034476058025
Epoch 4 step 1218: training accuarcy: 0.9480000000000001
Epoch 4 step 1218: training loss: 460.46076550649326
Epoch 4 step 1219: training accuarcy: 0.9415
Epoch 4 step 1219: training loss: 450.6857025809599
Epoch 4 step 1220: training accuarcy: 0.9455
Epoch 4 step 1220: training loss: 470.5263183588476
Epoch 4 step 1221: training accuarcy: 0.937
Epoch 4 step 1221: training loss: 476.2984204497551
Epoch 4 step 1222: training accuarcy: 0.9380000000000001
Epoch 4 step 1222: training loss: 475.4403628080806
Epoch 4 step 1223: training accuarcy: 0.9380000000000001
Epoch 4 step 1223: training loss: 457.44684615694365
Epoch 4 step 1224: training accuarcy: 0.9495
Epoch 4 step 1224: tr

Epoch 4 step 1294: training loss: 437.1273217584302
Epoch 4 step 1295: training accuarcy: 0.9460000000000001
Epoch 4 step 1295: training loss: 444.1141464316622
Epoch 4 step 1296: training accuarcy: 0.9385
Epoch 4 step 1296: training loss: 437.65787754953857
Epoch 4 step 1297: training accuarcy: 0.9460000000000001
Epoch 4 step 1297: training loss: 460.94384804461305
Epoch 4 step 1298: training accuarcy: 0.936
Epoch 4 step 1298: training loss: 440.6641427777167
Epoch 4 step 1299: training accuarcy: 0.9440000000000001
Epoch 4 step 1299: training loss: 432.6464711463914
Epoch 4 step 1300: training accuarcy: 0.9440000000000001
Epoch 4 step 1300: training loss: 443.52887251346385
Epoch 4 step 1301: training accuarcy: 0.9435
Epoch 4 step 1301: training loss: 435.30653725169236
Epoch 4 step 1302: training accuarcy: 0.9505
Epoch 4 step 1302: training loss: 427.79114290583607
Epoch 4 step 1303: training accuarcy: 0.9465
Epoch 4 step 1303: training loss: 429.83220819100416
Epoch 4 step 1304: tra

 62%|███████████████████████████████████████████████████████████████████████████████████████████████                                                         | 5/8 [11:08<06:40, 133.64s/it]

Epoch: 5
Epoch 5 step 1315: training loss: 382.69158540254244
Epoch 5 step 1316: training accuarcy: 0.9635
Epoch 5 step 1316: training loss: 378.9544995619815
Epoch 5 step 1317: training accuarcy: 0.9675
Epoch 5 step 1317: training loss: 363.4918250079975
Epoch 5 step 1318: training accuarcy: 0.9655
Epoch 5 step 1318: training loss: 377.13418737692473
Epoch 5 step 1319: training accuarcy: 0.9645
Epoch 5 step 1319: training loss: 369.4395039650103
Epoch 5 step 1320: training accuarcy: 0.968
Epoch 5 step 1320: training loss: 368.33549376382643
Epoch 5 step 1321: training accuarcy: 0.967
Epoch 5 step 1321: training loss: 376.05285522612644
Epoch 5 step 1322: training accuarcy: 0.9580000000000001
Epoch 5 step 1322: training loss: 383.3455247175375
Epoch 5 step 1323: training accuarcy: 0.9560000000000001
Epoch 5 step 1323: training loss: 394.03600890055907
Epoch 5 step 1324: training accuarcy: 0.96
Epoch 5 step 1324: training loss: 368.10827830408823
Epoch 5 step 1325: training accuarcy: 0.

Epoch 5 step 1398: training loss: 370.61162255082525
Epoch 5 step 1399: training accuarcy: 0.965
Epoch 5 step 1399: training loss: 352.5954774327864
Epoch 5 step 1400: training accuarcy: 0.9655
Epoch 5 step 1400: training loss: 372.20125989883155
Epoch 5 step 1401: training accuarcy: 0.9605
Epoch 5 step 1401: training loss: 367.08795141030123
Epoch 5 step 1402: training accuarcy: 0.96
Epoch 5 step 1402: training loss: 354.8000940037484
Epoch 5 step 1403: training accuarcy: 0.968
Epoch 5 step 1403: training loss: 367.3864738985663
Epoch 5 step 1404: training accuarcy: 0.9585
Epoch 5 step 1404: training loss: 358.9317877069067
Epoch 5 step 1405: training accuarcy: 0.967
Epoch 5 step 1405: training loss: 366.36194844782074
Epoch 5 step 1406: training accuarcy: 0.9605
Epoch 5 step 1406: training loss: 371.16212999958697
Epoch 5 step 1407: training accuarcy: 0.9575
Epoch 5 step 1407: training loss: 349.032322132999
Epoch 5 step 1408: training accuarcy: 0.965
Epoch 5 step 1408: training loss

Epoch 5 step 1482: training loss: 350.86489094437417
Epoch 5 step 1483: training accuarcy: 0.9585
Epoch 5 step 1483: training loss: 347.93031699317316
Epoch 5 step 1484: training accuarcy: 0.969
Epoch 5 step 1484: training loss: 342.0816574469401
Epoch 5 step 1485: training accuarcy: 0.9645
Epoch 5 step 1485: training loss: 350.72908298711667
Epoch 5 step 1486: training accuarcy: 0.9595
Epoch 5 step 1486: training loss: 360.46153950730786
Epoch 5 step 1487: training accuarcy: 0.9635
Epoch 5 step 1487: training loss: 341.6690652410034
Epoch 5 step 1488: training accuarcy: 0.964
Epoch 5 step 1488: training loss: 324.1891203548594
Epoch 5 step 1489: training accuarcy: 0.965
Epoch 5 step 1489: training loss: 355.0459323375949
Epoch 5 step 1490: training accuarcy: 0.9625
Epoch 5 step 1490: training loss: 344.19741632898473
Epoch 5 step 1491: training accuarcy: 0.965
Epoch 5 step 1491: training loss: 335.5746837353208
Epoch 5 step 1492: training accuarcy: 0.9625
Epoch 5 step 1492: training l

Epoch 5 step 1566: training loss: 319.46091384713014
Epoch 5 step 1567: training accuarcy: 0.9705
Epoch 5 step 1567: training loss: 336.4508112412377
Epoch 5 step 1568: training accuarcy: 0.9665
Epoch 5 step 1568: training loss: 323.92785401255634
Epoch 5 step 1569: training accuarcy: 0.9715
Epoch 5 step 1569: training loss: 339.8402999860718
Epoch 5 step 1570: training accuarcy: 0.963
Epoch 5 step 1570: training loss: 341.8542358580971
Epoch 5 step 1571: training accuarcy: 0.961
Epoch 5 step 1571: training loss: 339.8648487810889
Epoch 5 step 1572: training accuarcy: 0.9625
Epoch 5 step 1572: training loss: 331.38994079883923
Epoch 5 step 1573: training accuarcy: 0.969
Epoch 5 step 1573: training loss: 320.7305441607089
Epoch 5 step 1574: training accuarcy: 0.969
Epoch 5 step 1574: training loss: 324.4055315365067
Epoch 5 step 1575: training accuarcy: 0.969
Epoch 5 step 1575: training loss: 311.67969606415227
Epoch 5 step 1576: training accuarcy: 0.9695
Epoch 5 step 1576: training los

 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                      | 6/8 [13:22<04:27, 133.79s/it]

Epoch: 6
Epoch 6 step 1578: training loss: 270.0170837335595
Epoch 6 step 1579: training accuarcy: 0.9805
Epoch 6 step 1579: training loss: 281.39022914885066
Epoch 6 step 1580: training accuarcy: 0.9825
Epoch 6 step 1580: training loss: 287.0902149166919
Epoch 6 step 1581: training accuarcy: 0.9765
Epoch 6 step 1581: training loss: 276.83760292112345
Epoch 6 step 1582: training accuarcy: 0.979
Epoch 6 step 1582: training loss: 282.16422260623045
Epoch 6 step 1583: training accuarcy: 0.977
Epoch 6 step 1583: training loss: 294.42673460693646
Epoch 6 step 1584: training accuarcy: 0.976
Epoch 6 step 1584: training loss: 268.28140452610694
Epoch 6 step 1585: training accuarcy: 0.984
Epoch 6 step 1585: training loss: 280.8093418639373
Epoch 6 step 1586: training accuarcy: 0.9765
Epoch 6 step 1586: training loss: 280.86573416291475
Epoch 6 step 1587: training accuarcy: 0.9765
Epoch 6 step 1587: training loss: 280.5462040540863
Epoch 6 step 1588: training accuarcy: 0.9765
Epoch 6 step 1588: 

Epoch 6 step 1663: training accuarcy: 0.982
Epoch 6 step 1663: training loss: 272.74372363398527
Epoch 6 step 1664: training accuarcy: 0.974
Epoch 6 step 1664: training loss: 260.3027794584119
Epoch 6 step 1665: training accuarcy: 0.982
Epoch 6 step 1665: training loss: 260.45961682298406
Epoch 6 step 1666: training accuarcy: 0.9825
Epoch 6 step 1666: training loss: 264.2485271111472
Epoch 6 step 1667: training accuarcy: 0.976
Epoch 6 step 1667: training loss: 250.7108814927727
Epoch 6 step 1668: training accuarcy: 0.985
Epoch 6 step 1668: training loss: 277.4262102871315
Epoch 6 step 1669: training accuarcy: 0.9725
Epoch 6 step 1669: training loss: 277.7818438752277
Epoch 6 step 1670: training accuarcy: 0.977
Epoch 6 step 1670: training loss: 247.6617572399866
Epoch 6 step 1671: training accuarcy: 0.9835
Epoch 6 step 1671: training loss: 263.3573428730975
Epoch 6 step 1672: training accuarcy: 0.9785
Epoch 6 step 1672: training loss: 290.92088549129676
Epoch 6 step 1673: training accua

Epoch 6 step 1747: training loss: 239.25604234662714
Epoch 6 step 1748: training accuarcy: 0.984
Epoch 6 step 1748: training loss: 250.5090920832568
Epoch 6 step 1749: training accuarcy: 0.9795
Epoch 6 step 1749: training loss: 270.89029412113
Epoch 6 step 1750: training accuarcy: 0.978
Epoch 6 step 1750: training loss: 252.03609499469488
Epoch 6 step 1751: training accuarcy: 0.983
Epoch 6 step 1751: training loss: 260.92424193545634
Epoch 6 step 1752: training accuarcy: 0.978
Epoch 6 step 1752: training loss: 250.57131871772398
Epoch 6 step 1753: training accuarcy: 0.979
Epoch 6 step 1753: training loss: 260.8822594586854
Epoch 6 step 1754: training accuarcy: 0.981
Epoch 6 step 1754: training loss: 277.7113655219941
Epoch 6 step 1755: training accuarcy: 0.97
Epoch 6 step 1755: training loss: 244.93968000144952
Epoch 6 step 1756: training accuarcy: 0.985
Epoch 6 step 1756: training loss: 244.50587167179933
Epoch 6 step 1757: training accuarcy: 0.9825
Epoch 6 step 1757: training loss: 2

Epoch 6 step 1831: training loss: 256.0917421326599
Epoch 6 step 1832: training accuarcy: 0.977
Epoch 6 step 1832: training loss: 242.16521695225123
Epoch 6 step 1833: training accuarcy: 0.98
Epoch 6 step 1833: training loss: 245.67361457117
Epoch 6 step 1834: training accuarcy: 0.9795
Epoch 6 step 1834: training loss: 240.71289769909637
Epoch 6 step 1835: training accuarcy: 0.979
Epoch 6 step 1835: training loss: 245.31565645645395
Epoch 6 step 1836: training accuarcy: 0.979
Epoch 6 step 1836: training loss: 256.6094074278575
Epoch 6 step 1837: training accuarcy: 0.979
Epoch 6 step 1837: training loss: 233.37632116617718
Epoch 6 step 1838: training accuarcy: 0.986
Epoch 6 step 1838: training loss: 234.0348094966601
Epoch 6 step 1839: training accuarcy: 0.9845
Epoch 6 step 1839: training loss: 244.20991161044654
Epoch 6 step 1840: training accuarcy: 0.981
Epoch 6 step 1840: training loss: 100.71726507129797
Epoch 6 step 1841: training accuarcy: 0.985897435897436
Epoch 6: train loss 262

 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                   | 7/8 [15:33<02:12, 132.84s/it]

Epoch: 7
Epoch 7 step 1841: training loss: 207.70664988120637
Epoch 7 step 1842: training accuarcy: 0.987
Epoch 7 step 1842: training loss: 201.9350882288199
Epoch 7 step 1843: training accuarcy: 0.9905
Epoch 7 step 1843: training loss: 200.32081485993274
Epoch 7 step 1844: training accuarcy: 0.9895
Epoch 7 step 1844: training loss: 211.9872521946416
Epoch 7 step 1845: training accuarcy: 0.9875
Epoch 7 step 1845: training loss: 211.92228309816176
Epoch 7 step 1846: training accuarcy: 0.9875
Epoch 7 step 1846: training loss: 201.46405688122888
Epoch 7 step 1847: training accuarcy: 0.99
Epoch 7 step 1847: training loss: 209.99295785035767
Epoch 7 step 1848: training accuarcy: 0.99
Epoch 7 step 1848: training loss: 212.418770928042
Epoch 7 step 1849: training accuarcy: 0.9915
Epoch 7 step 1849: training loss: 198.96235930242898
Epoch 7 step 1850: training accuarcy: 0.989
Epoch 7 step 1850: training loss: 210.20298751367457
Epoch 7 step 1851: training accuarcy: 0.9865
Epoch 7 step 1851: tr

Epoch 7 step 1925: training loss: 194.71575382818938
Epoch 7 step 1926: training accuarcy: 0.989
Epoch 7 step 1926: training loss: 202.36474075876674
Epoch 7 step 1927: training accuarcy: 0.9845
Epoch 7 step 1927: training loss: 195.91515723538737
Epoch 7 step 1928: training accuarcy: 0.9875
Epoch 7 step 1928: training loss: 191.8330441657476
Epoch 7 step 1929: training accuarcy: 0.9895
Epoch 7 step 1929: training loss: 201.41853329777916
Epoch 7 step 1930: training accuarcy: 0.987
Epoch 7 step 1930: training loss: 209.73906444579933
Epoch 7 step 1931: training accuarcy: 0.986
Epoch 7 step 1931: training loss: 200.4897903799368
Epoch 7 step 1932: training accuarcy: 0.99
Epoch 7 step 1932: training loss: 201.1701422003739
Epoch 7 step 1933: training accuarcy: 0.99
Epoch 7 step 1933: training loss: 191.6006720319378
Epoch 7 step 1934: training accuarcy: 0.99
Epoch 7 step 1934: training loss: 204.16885072806537
Epoch 7 step 1935: training accuarcy: 0.988
Epoch 7 step 1935: training loss: 

Epoch 7 step 2009: training loss: 189.85619358721257
Epoch 7 step 2010: training accuarcy: 0.989
Epoch 7 step 2010: training loss: 190.03284790052487
Epoch 7 step 2011: training accuarcy: 0.9855
Epoch 7 step 2011: training loss: 188.68278290596368
Epoch 7 step 2012: training accuarcy: 0.985
Epoch 7 step 2012: training loss: 192.35876979405282
Epoch 7 step 2013: training accuarcy: 0.99
Epoch 7 step 2013: training loss: 194.51239370097494
Epoch 7 step 2014: training accuarcy: 0.989
Epoch 7 step 2014: training loss: 198.96255752038684
Epoch 7 step 2015: training accuarcy: 0.991
Epoch 7 step 2015: training loss: 195.3538354390031
Epoch 7 step 2016: training accuarcy: 0.9875
Epoch 7 step 2016: training loss: 205.26478259370407
Epoch 7 step 2017: training accuarcy: 0.9865
Epoch 7 step 2017: training loss: 198.26959940048633
Epoch 7 step 2018: training accuarcy: 0.986
Epoch 7 step 2018: training loss: 199.2528931640351
Epoch 7 step 2019: training accuarcy: 0.9865
Epoch 7 step 2019: training l

Epoch 7 step 2093: training loss: 179.8702585323107
Epoch 7 step 2094: training accuarcy: 0.9905
Epoch 7 step 2094: training loss: 174.41150387827287
Epoch 7 step 2095: training accuarcy: 0.9925
Epoch 7 step 2095: training loss: 198.10309414644811
Epoch 7 step 2096: training accuarcy: 0.985
Epoch 7 step 2096: training loss: 180.61786192221035
Epoch 7 step 2097: training accuarcy: 0.99
Epoch 7 step 2097: training loss: 189.88928679214618
Epoch 7 step 2098: training accuarcy: 0.99
Epoch 7 step 2098: training loss: 189.52055193954126
Epoch 7 step 2099: training accuarcy: 0.985
Epoch 7 step 2099: training loss: 185.10759346590456
Epoch 7 step 2100: training accuarcy: 0.9885
Epoch 7 step 2100: training loss: 186.21774012971227
Epoch 7 step 2101: training accuarcy: 0.987
Epoch 7 step 2101: training loss: 188.7504374050971
Epoch 7 step 2102: training accuarcy: 0.987
Epoch 7 step 2102: training loss: 177.66305252278585
Epoch 7 step 2103: training accuarcy: 0.9925
Epoch 7 step 2103: training lo

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [17:43<00:00, 132.13s/it]


In [44]:
del trans_model
T.cuda.empty_cache()