In [1]:
%matplotlib inline

In [2]:
cd ..

C:\Projects\python\recommender


In [3]:
from functools import partial
from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd
import torch as T
import torch.optim as optim

from utils import get_log_dir

In [4]:
from datasets import SeqKaggle
from models import FMLearner, TorchFM, TorchHrmFM, TorchPrmeFM, TorchTransFM
from models.fm_learner import simple_loss, trans_loss

In [5]:
DEVICE = T.cuda.current_device()
BATCH = 2000
SHUFFLE = True
WORKERS = 0
NEG_SAMPLE = 5
item_path = Path("./inputs/kaggle/item.csv")

In [6]:
db = SeqKaggle(data_path=item_path, user_min=4)
db

Raw dataframe shape (476244, 8)
After drop nan shape: (429988, 8)
Original comptition size: 292
Original competitor size: 140065
Filtered competiter size: 27449
Filtered dataframe shape: (284806, 8)


<datasets.torch_kaggle.SeqKaggle at 0x22d44240470>

In [7]:
db.config_db(batch_size=BATCH,
             shuffle=SHUFFLE,
             num_workers=WORKERS,
             device=DEVICE,
             neg_sample=NEG_SAMPLE)

In [8]:
feat_dim = db.feat_dim
NUM_DIM = 124
INIT_MEAN = 0.1

## Create Criterion

In [9]:
# regst setting
LINEAR_REG = 1
EMB_REG = 1
TRANS_REG = 1

In [10]:
simple_loss_callback = partial(simple_loss, LINEAR_REG, EMB_REG)
simple_loss_callback

functools.partial(<function simple_loss at 0x0000022D441B80D0>, 1, 1)

In [11]:
trans_loss_callback = partial(trans_loss, LINEAR_REG, EMB_REG, TRANS_REG)
trans_loss_callback

functools.partial(<function trans_loss at 0x0000022D442472F0>, 1, 1, 1)

## Train Model

### Hyper-parameter

In [12]:
feat_dim = db.feat_dim
NUM_DIM = 124
INIT_MEAN = 0.1

### Train FM Model

In [13]:
LEARNING_RATE = 0.001
DECAY_FREQ = 1000
DECAY_GAMMA = 1

In [14]:
fm_model = TorchFM(feature_dim=feat_dim, num_dim=NUM_DIM, init_mean=INIT_MEAN)
fm_model

TorchFM()

In [15]:
adam_opt = optim.Adam(fm_model.parameters(), lr=LEARNING_RATE)
schedular = optim.lr_scheduler.StepLR(adam_opt, step_size=DECAY_FREQ, gamma=DECAY_GAMMA)

In [16]:
fm_learner = FMLearner(fm_model, adam_opt, schedular, db)
fm_learner

<models.fm_learner.FMLearner at 0x22d4bf31cf8>

In [17]:
fm_learner.compile(train_col='base',
                   valid_col='seq',
                   test_col='seq',
                   loss_callback=simple_loss_callback)

In [18]:
fm_learner.fit(epoch=5,
               log_dir=get_log_dir(ds_type='seq_kaggle', model_type='fm'))

  0%|                                                                                                                                                                                                    | 0/5 [00:00<?, ?it/s]

Epoch: 0
Epoch 0 step 0: training loss: 12010.134403521854
Epoch 0 step 1: training accuarcy: 0.859
Epoch 0 step 1: training loss: 11661.927133434572
Epoch 0 step 2: training accuarcy: 0.866
Epoch 0 step 2: training loss: 11303.551708048053
Epoch 0 step 3: training accuarcy: 0.8805000000000001
Epoch 0 step 3: training loss: 10966.791070946549
Epoch 0 step 4: training accuarcy: 0.8905000000000001
Epoch 0 step 4: training loss: 10616.640202072596
Epoch 0 step 5: training accuarcy: 0.91
Epoch 0 step 5: training loss: 10284.627760775282
Epoch 0 step 6: training accuarcy: 0.918
Epoch 0 step 6: training loss: 9970.833579659313
Epoch 0 step 7: training accuarcy: 0.928
Epoch 0 step 7: training loss: 9665.134099415494
Epoch 0 step 8: training accuarcy: 0.9325
Epoch 0 step 8: training loss: 9366.689589813801
Epoch 0 step 9: training accuarcy: 0.9375
Epoch 0 step 9: training loss: 9045.843292819876
Epoch 0 step 10: training accuarcy: 0.9535
Epoch 0 step 10: training loss: 8779.919747385251
Epoch 

Epoch 0 step 88: training accuarcy: 0.9965
Epoch 0 step 88: training loss: 501.70881084428567
Epoch 0 step 89: training accuarcy: 0.997
Epoch 0 step 89: training loss: 482.4016022994004
Epoch 0 step 90: training accuarcy: 0.9975
Epoch 0 step 90: training loss: 461.1580390331703
Epoch 0 step 91: training accuarcy: 0.9975
Epoch 0 step 91: training loss: 442.7490853990652
Epoch 0 step 92: training accuarcy: 0.9965
Epoch 0 step 92: training loss: 429.09424278617627
Epoch 0 step 93: training accuarcy: 0.996
Epoch 0 step 93: training loss: 414.5231817965218
Epoch 0 step 94: training accuarcy: 0.996
Epoch 0 step 94: training loss: 396.5293327894741
Epoch 0 step 95: training accuarcy: 0.9975
Epoch 0 step 95: training loss: 383.52456499856015
Epoch 0 step 96: training accuarcy: 0.996
Epoch 0 step 96: training loss: 367.64709805609255
Epoch 0 step 97: training accuarcy: 0.997
Epoch 0 step 97: training loss: 353.09346535688996
Epoch 0 step 98: training accuarcy: 0.9975
Epoch 0 step 98: training l

 20%|█████████████████████████████████████▍                                                                                                                                                     | 1/5 [02:08<08:34, 128.67s/it]

Epoch: 1
Epoch 1 step 115: training loss: 176.69263084855842
Epoch 1 step 116: training accuarcy: 0.997
Epoch 1 step 116: training loss: 170.66428352982518
Epoch 1 step 117: training accuarcy: 0.999
Epoch 1 step 117: training loss: 167.43374756749955
Epoch 1 step 118: training accuarcy: 0.999
Epoch 1 step 118: training loss: 160.6268022069496
Epoch 1 step 119: training accuarcy: 0.9995
Epoch 1 step 119: training loss: 154.2714364209173
Epoch 1 step 120: training accuarcy: 0.9975
Epoch 1 step 120: training loss: 148.36978118952572
Epoch 1 step 121: training accuarcy: 0.999
Epoch 1 step 121: training loss: 146.79330004219398
Epoch 1 step 122: training accuarcy: 0.9995
Epoch 1 step 122: training loss: 140.71531403831943
Epoch 1 step 123: training accuarcy: 0.9995
Epoch 1 step 123: training loss: 136.05693393296139
Epoch 1 step 124: training accuarcy: 0.999
Epoch 1 step 124: training loss: 131.3328637321447
Epoch 1 step 125: training accuarcy: 0.9975
Epoch 1 step 125: training loss: 125.47

Epoch 1 step 202: training accuarcy: 0.999
Epoch 1 step 202: training loss: 35.02132886468987
Epoch 1 step 203: training accuarcy: 0.9995
Epoch 1 step 203: training loss: 35.810385039010065
Epoch 1 step 204: training accuarcy: 0.999
Epoch 1 step 204: training loss: 33.3990637282775
Epoch 1 step 205: training accuarcy: 1.0
Epoch 1 step 205: training loss: 35.41912288900711
Epoch 1 step 206: training accuarcy: 1.0
Epoch 1 step 206: training loss: 37.38118670866964
Epoch 1 step 207: training accuarcy: 0.9995
Epoch 1 step 207: training loss: 35.1885095429769
Epoch 1 step 208: training accuarcy: 0.998
Epoch 1 step 208: training loss: 33.60448715392165
Epoch 1 step 209: training accuarcy: 0.9995
Epoch 1 step 209: training loss: 35.92314502859578
Epoch 1 step 210: training accuarcy: 0.9985
Epoch 1 step 210: training loss: 34.29951167622933
Epoch 1 step 211: training accuarcy: 0.9995
Epoch 1 step 211: training loss: 33.94581729217731
Epoch 1 step 212: training accuarcy: 1.0
Epoch 1 step 212: t

 40%|██████████████████████████████████████████████████████████████████████████▊                                                                                                                | 2/5 [04:16<06:24, 128.28s/it]

Epoch: 2
Epoch 2 step 230: training loss: 34.64273421655233
Epoch 2 step 231: training accuarcy: 0.999
Epoch 2 step 231: training loss: 27.906600745953654
Epoch 2 step 232: training accuarcy: 1.0
Epoch 2 step 232: training loss: 29.376733057995928
Epoch 2 step 233: training accuarcy: 0.9995
Epoch 2 step 233: training loss: 31.39619481355246
Epoch 2 step 234: training accuarcy: 0.9995
Epoch 2 step 234: training loss: 29.850975281649255
Epoch 2 step 235: training accuarcy: 1.0
Epoch 2 step 235: training loss: 31.244792490732884
Epoch 2 step 236: training accuarcy: 0.9995
Epoch 2 step 236: training loss: 27.512945294778966
Epoch 2 step 237: training accuarcy: 1.0
Epoch 2 step 237: training loss: 28.575287338534213
Epoch 2 step 238: training accuarcy: 1.0
Epoch 2 step 238: training loss: 28.266758905663856
Epoch 2 step 239: training accuarcy: 1.0
Epoch 2 step 239: training loss: 28.286086576493986
Epoch 2 step 240: training accuarcy: 1.0
Epoch 2 step 240: training loss: 31.559995370702445


Epoch 2 step 317: training accuarcy: 0.9995
Epoch 2 step 317: training loss: 24.13302112702363
Epoch 2 step 318: training accuarcy: 1.0
Epoch 2 step 318: training loss: 26.589983827962946
Epoch 2 step 319: training accuarcy: 1.0
Epoch 2 step 319: training loss: 25.104082351093638
Epoch 2 step 320: training accuarcy: 1.0
Epoch 2 step 320: training loss: 25.33478973430119
Epoch 2 step 321: training accuarcy: 0.9995
Epoch 2 step 321: training loss: 26.46413091195777
Epoch 2 step 322: training accuarcy: 1.0
Epoch 2 step 322: training loss: 23.448958234919324
Epoch 2 step 323: training accuarcy: 1.0
Epoch 2 step 323: training loss: 26.307982561241193
Epoch 2 step 324: training accuarcy: 1.0
Epoch 2 step 324: training loss: 26.739612997761178
Epoch 2 step 325: training accuarcy: 0.9995
Epoch 2 step 325: training loss: 27.8618968777188
Epoch 2 step 326: training accuarcy: 0.999
Epoch 2 step 326: training loss: 25.03776121314371
Epoch 2 step 327: training accuarcy: 1.0
Epoch 2 step 327: traini

 60%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                          | 3/5 [06:19<04:13, 126.89s/it]

Epoch: 3
Epoch 3 step 345: training loss: 23.850032278194835
Epoch 3 step 346: training accuarcy: 1.0
Epoch 3 step 346: training loss: 26.292279634704826
Epoch 3 step 347: training accuarcy: 1.0
Epoch 3 step 347: training loss: 24.19902681050049
Epoch 3 step 348: training accuarcy: 1.0
Epoch 3 step 348: training loss: 22.310082592037375
Epoch 3 step 349: training accuarcy: 0.9995
Epoch 3 step 349: training loss: 25.969487148152847
Epoch 3 step 350: training accuarcy: 1.0
Epoch 3 step 350: training loss: 24.9263669552314
Epoch 3 step 351: training accuarcy: 1.0
Epoch 3 step 351: training loss: 23.732361874251914
Epoch 3 step 352: training accuarcy: 0.9985
Epoch 3 step 352: training loss: 21.448959048812856
Epoch 3 step 353: training accuarcy: 1.0
Epoch 3 step 353: training loss: 24.293695052033485
Epoch 3 step 354: training accuarcy: 1.0
Epoch 3 step 354: training loss: 25.19805816588321
Epoch 3 step 355: training accuarcy: 1.0
Epoch 3 step 355: training loss: 23.974540836058047
Epoch 3

Epoch 3 step 432: training loss: 23.77069883606715
Epoch 3 step 433: training accuarcy: 1.0
Epoch 3 step 433: training loss: 21.327415651406184
Epoch 3 step 434: training accuarcy: 1.0
Epoch 3 step 434: training loss: 23.083068724234117
Epoch 3 step 435: training accuarcy: 1.0
Epoch 3 step 435: training loss: 24.894827925799213
Epoch 3 step 436: training accuarcy: 1.0
Epoch 3 step 436: training loss: 23.09736427663123
Epoch 3 step 437: training accuarcy: 1.0
Epoch 3 step 437: training loss: 23.2771396302409
Epoch 3 step 438: training accuarcy: 1.0
Epoch 3 step 438: training loss: 22.99546102661319
Epoch 3 step 439: training accuarcy: 1.0
Epoch 3 step 439: training loss: 23.442870234533196
Epoch 3 step 440: training accuarcy: 0.9995
Epoch 3 step 440: training loss: 22.119943089354003
Epoch 3 step 441: training accuarcy: 0.9995
Epoch 3 step 441: training loss: 20.72685337586546
Epoch 3 step 442: training accuarcy: 1.0
Epoch 3 step 442: training loss: 21.166515215018958
Epoch 3 step 443: 

 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                     | 4/5 [08:24<02:06, 126.39s/it]

Epoch: 4
Epoch 4 step 460: training loss: 22.093097314793205
Epoch 4 step 461: training accuarcy: 1.0
Epoch 4 step 461: training loss: 20.479855293469296
Epoch 4 step 462: training accuarcy: 1.0
Epoch 4 step 462: training loss: 23.077231024032585
Epoch 4 step 463: training accuarcy: 1.0
Epoch 4 step 463: training loss: 24.752767447622205
Epoch 4 step 464: training accuarcy: 1.0
Epoch 4 step 464: training loss: 21.38133707775594
Epoch 4 step 465: training accuarcy: 1.0
Epoch 4 step 465: training loss: 23.84831249343437
Epoch 4 step 466: training accuarcy: 1.0
Epoch 4 step 466: training loss: 21.59820311113603
Epoch 4 step 467: training accuarcy: 1.0
Epoch 4 step 467: training loss: 22.431626633763504
Epoch 4 step 468: training accuarcy: 0.9995
Epoch 4 step 468: training loss: 23.037834132279635
Epoch 4 step 469: training accuarcy: 1.0
Epoch 4 step 469: training loss: 21.151172870192667
Epoch 4 step 470: training accuarcy: 1.0
Epoch 4 step 470: training loss: 20.054940553169185
Epoch 4 s

Epoch 4 step 548: training loss: 22.577885405822222
Epoch 4 step 549: training accuarcy: 0.9995
Epoch 4 step 549: training loss: 21.948807343194687
Epoch 4 step 550: training accuarcy: 1.0
Epoch 4 step 550: training loss: 23.066434788752737
Epoch 4 step 551: training accuarcy: 1.0
Epoch 4 step 551: training loss: 25.002768529695825
Epoch 4 step 552: training accuarcy: 1.0
Epoch 4 step 552: training loss: 22.506653019854877
Epoch 4 step 553: training accuarcy: 1.0
Epoch 4 step 553: training loss: 20.209354508288904
Epoch 4 step 554: training accuarcy: 1.0
Epoch 4 step 554: training loss: 21.08350445549492
Epoch 4 step 555: training accuarcy: 1.0
Epoch 4 step 555: training loss: 23.354571106754037
Epoch 4 step 556: training accuarcy: 1.0
Epoch 4 step 556: training loss: 21.123411509775774
Epoch 4 step 557: training accuarcy: 1.0
Epoch 4 step 557: training loss: 23.16923795246076
Epoch 4 step 558: training accuarcy: 1.0
Epoch 4 step 558: training loss: 20.947775097136358
Epoch 4 step 559:

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [10:29<00:00, 125.77s/it]


In [19]:
del fm_model
T.cuda.empty_cache()

### Train HRM FM Model

In [25]:
hrm_model = TorchHrmFM(feature_dim=feat_dim, num_dim=NUM_DIM, init_mean=INIT_MEAN)
hrm_model

TorchHrmFM()

In [26]:
adam_opt = optim.Adam(hrm_model.parameters(), lr=LEARNING_RATE)
schedular = optim.lr_scheduler.StepLR(adam_opt,
                                      step_size=DECAY_FREQ,
                                      gamma=DECAY_GAMMA)

In [27]:
hrm_learner = FMLearner(hrm_model, adam_opt, schedular, db)
hrm_learner

<models.fm_learner.FMLearner at 0x22d4c19fb00>

In [28]:
hrm_learner.compile(train_col='base',
                   valid_col='seq',
                   test_col='seq',
                   loss_callback=simple_loss_callback)

In [29]:
hrm_learner.fit(epoch=5,
                log_dir=get_log_dir('seq_kaggle', 'hrm'))

  0%|                                                                                                                                                                                     | 0/5 [00:00<?, ?it/s]

Epoch: 0
Epoch 0 step 0: training loss: 49645.59129197651
Epoch 0 step 1: training accuarcy: 0.0845
Epoch 0 step 1: training loss: 49441.97592334109
Epoch 0 step 2: training accuarcy: 0.0905
Epoch 0 step 2: training loss: 48603.50561359062
Epoch 0 step 3: training accuarcy: 0.116
Epoch 0 step 3: training loss: 48573.254697295575
Epoch 0 step 4: training accuarcy: 0.1255
Epoch 0 step 4: training loss: 48383.31143669318
Epoch 0 step 5: training accuarcy: 0.1225
Epoch 0 step 5: training loss: 47993.91617766794
Epoch 0 step 6: training accuarcy: 0.132
Epoch 0 step 6: training loss: 47035.30560572991
Epoch 0 step 7: training accuarcy: 0.158
Epoch 0 step 7: training loss: 47660.835588680304
Epoch 0 step 8: training accuarcy: 0.132
Epoch 0 step 8: training loss: 46773.58345595264
Epoch 0 step 9: training accuarcy: 0.1465
Epoch 0 step 9: training loss: 46477.09447929335
Epoch 0 step 10: training accuarcy: 0.146
Epoch 0 step 10: training loss: 46114.286716883274
Epoch 0 step 11: training accuar

Epoch 0 step 87: training loss: 37909.70462175318
Epoch 0 step 88: training accuarcy: 0.17500000000000002
Epoch 0 step 88: training loss: 37852.82818312794
Epoch 0 step 89: training accuarcy: 0.179
Epoch 0 step 89: training loss: 37224.46167739234
Epoch 0 step 90: training accuarcy: 0.192
Epoch 0 step 90: training loss: 38128.95363949669
Epoch 0 step 91: training accuarcy: 0.1665
Epoch 0 step 91: training loss: 37222.67187229151
Epoch 0 step 92: training accuarcy: 0.189
Epoch 0 step 92: training loss: 37599.00787620198
Epoch 0 step 93: training accuarcy: 0.179
Epoch 0 step 93: training loss: 37403.69916096702
Epoch 0 step 94: training accuarcy: 0.179
Epoch 0 step 94: training loss: 36778.568216526815
Epoch 0 step 95: training accuarcy: 0.194
Epoch 0 step 95: training loss: 37188.1516340039
Epoch 0 step 96: training accuarcy: 0.17350000000000002
Epoch 0 step 96: training loss: 36940.37171390647
Epoch 0 step 97: training accuarcy: 0.181
Epoch 0 step 97: training loss: 36128.91697511067
E

 20%|██████████████████████████████████▍                                                                                                                                         | 1/5 [02:07<08:28, 127.16s/it]

Epoch: 1
Epoch 1 step 115: training loss: 19112.670434745287
Epoch 1 step 116: training accuarcy: 0.5705
Epoch 1 step 116: training loss: 17932.8671142867
Epoch 1 step 117: training accuarcy: 0.599
Epoch 1 step 117: training loss: 17458.127848569922
Epoch 1 step 118: training accuarcy: 0.609
Epoch 1 step 118: training loss: 17237.972622242534
Epoch 1 step 119: training accuarcy: 0.6175
Epoch 1 step 119: training loss: 16486.513948923544
Epoch 1 step 120: training accuarcy: 0.629
Epoch 1 step 120: training loss: 14601.548438456082
Epoch 1 step 121: training accuarcy: 0.672
Epoch 1 step 121: training loss: 14350.224632817635
Epoch 1 step 122: training accuarcy: 0.672
Epoch 1 step 122: training loss: 13364.166893398597
Epoch 1 step 123: training accuarcy: 0.7005
Epoch 1 step 123: training loss: 12341.548650155108
Epoch 1 step 124: training accuarcy: 0.733
Epoch 1 step 124: training loss: 12330.561203806443
Epoch 1 step 125: training accuarcy: 0.7285
Epoch 1 step 125: training loss: 11098.

Epoch 1 step 200: training accuarcy: 0.8295
Epoch 1 step 200: training loss: 7960.95026979283
Epoch 1 step 201: training accuarcy: 0.8265
Epoch 1 step 201: training loss: 8063.778172846337
Epoch 1 step 202: training accuarcy: 0.8230000000000001
Epoch 1 step 202: training loss: 8081.101560348519
Epoch 1 step 203: training accuarcy: 0.8250000000000001
Epoch 1 step 203: training loss: 7748.800726078015
Epoch 1 step 204: training accuarcy: 0.8275
Epoch 1 step 204: training loss: 8298.405117257184
Epoch 1 step 205: training accuarcy: 0.8170000000000001
Epoch 1 step 205: training loss: 8211.056289435559
Epoch 1 step 206: training accuarcy: 0.8230000000000001
Epoch 1 step 206: training loss: 7934.309078496858
Epoch 1 step 207: training accuarcy: 0.8260000000000001
Epoch 1 step 207: training loss: 7866.3581375851445
Epoch 1 step 208: training accuarcy: 0.8260000000000001
Epoch 1 step 208: training loss: 8206.597306585149
Epoch 1 step 209: training accuarcy: 0.8210000000000001
Epoch 1 step 209:

 40%|████████████████████████████████████████████████████████████████████▊                                                                                                       | 2/5 [04:14<06:21, 127.31s/it]

Epoch: 2
Epoch 2 step 230: training loss: 6814.439582142628
Epoch 2 step 231: training accuarcy: 0.8495
Epoch 2 step 231: training loss: 7482.41756318637
Epoch 2 step 232: training accuarcy: 0.836
Epoch 2 step 232: training loss: 7189.497681711356
Epoch 2 step 233: training accuarcy: 0.841
Epoch 2 step 233: training loss: 7229.8653551924735
Epoch 2 step 234: training accuarcy: 0.8405
Epoch 2 step 234: training loss: 6385.973911652611
Epoch 2 step 235: training accuarcy: 0.8605
Epoch 2 step 235: training loss: 7326.847724157771
Epoch 2 step 236: training accuarcy: 0.841
Epoch 2 step 236: training loss: 6382.824973095403
Epoch 2 step 237: training accuarcy: 0.861
Epoch 2 step 237: training loss: 7705.705493389038
Epoch 2 step 238: training accuarcy: 0.8305
Epoch 2 step 238: training loss: 7100.628861334906
Epoch 2 step 239: training accuarcy: 0.843
Epoch 2 step 239: training loss: 7493.000079311312
Epoch 2 step 240: training accuarcy: 0.835
Epoch 2 step 240: training loss: 7106.020841475

Epoch 2 step 317: training accuarcy: 0.8695
Epoch 2 step 317: training loss: 5967.02242407596
Epoch 2 step 318: training accuarcy: 0.871
Epoch 2 step 318: training loss: 6243.913025267256
Epoch 2 step 319: training accuarcy: 0.862
Epoch 2 step 319: training loss: 6218.2500914701195
Epoch 2 step 320: training accuarcy: 0.8625
Epoch 2 step 320: training loss: 6429.831228045804
Epoch 2 step 321: training accuarcy: 0.859
Epoch 2 step 321: training loss: 6314.188112955802
Epoch 2 step 322: training accuarcy: 0.8615
Epoch 2 step 322: training loss: 6787.6208464796455
Epoch 2 step 323: training accuarcy: 0.851
Epoch 2 step 323: training loss: 5818.168365117391
Epoch 2 step 324: training accuarcy: 0.873
Epoch 2 step 324: training loss: 6312.904349920344
Epoch 2 step 325: training accuarcy: 0.863
Epoch 2 step 325: training loss: 6842.490462996284
Epoch 2 step 326: training accuarcy: 0.8525
Epoch 2 step 326: training loss: 5808.800989360729
Epoch 2 step 327: training accuarcy: 0.872
Epoch 2 step

 60%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                    | 3/5 [06:22<04:15, 127.51s/it]

Epoch: 3
Epoch 3 step 345: training loss: 6425.701194370461
Epoch 3 step 346: training accuarcy: 0.8595
Epoch 3 step 346: training loss: 5678.533049882788
Epoch 3 step 347: training accuarcy: 0.8735
Epoch 3 step 347: training loss: 5780.225434777829
Epoch 3 step 348: training accuarcy: 0.874
Epoch 3 step 348: training loss: 6127.103119558243
Epoch 3 step 349: training accuarcy: 0.8655
Epoch 3 step 349: training loss: 6085.659280818756
Epoch 3 step 350: training accuarcy: 0.866
Epoch 3 step 350: training loss: 6294.424436924313
Epoch 3 step 351: training accuarcy: 0.863
Epoch 3 step 351: training loss: 6152.632069322913
Epoch 3 step 352: training accuarcy: 0.8655
Epoch 3 step 352: training loss: 5713.541155247742
Epoch 3 step 353: training accuarcy: 0.8715
Epoch 3 step 353: training loss: 5437.327409446771
Epoch 3 step 354: training accuarcy: 0.88
Epoch 3 step 354: training loss: 5886.80757338122
Epoch 3 step 355: training accuarcy: 0.873
Epoch 3 step 355: training loss: 6323.6537078059

Epoch 3 step 429: training loss: 5051.145352480146
Epoch 3 step 430: training accuarcy: 0.889
Epoch 3 step 430: training loss: 5506.512213120027
Epoch 3 step 431: training accuarcy: 0.881
Epoch 3 step 431: training loss: 5211.8335641605
Epoch 3 step 432: training accuarcy: 0.8855000000000001
Epoch 3 step 432: training loss: 4549.963639050891
Epoch 3 step 433: training accuarcy: 0.8995
Epoch 3 step 433: training loss: 5323.1351374213655
Epoch 3 step 434: training accuarcy: 0.884
Epoch 3 step 434: training loss: 4784.107057061808
Epoch 3 step 435: training accuarcy: 0.8935000000000001
Epoch 3 step 435: training loss: 4839.005957636638
Epoch 3 step 436: training accuarcy: 0.893
Epoch 3 step 436: training loss: 4553.01988473731
Epoch 3 step 437: training accuarcy: 0.899
Epoch 3 step 437: training loss: 4704.14353207499
Epoch 3 step 438: training accuarcy: 0.8985
Epoch 3 step 438: training loss: 4666.856245786459
Epoch 3 step 439: training accuarcy: 0.8955000000000001
Epoch 3 step 439: trai

 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                  | 4/5 [08:30<02:07, 127.59s/it]

Epoch: 4
Epoch 4 step 460: training loss: 4688.834822353939
Epoch 4 step 461: training accuarcy: 0.898
Epoch 4 step 461: training loss: 5244.239263049634
Epoch 4 step 462: training accuarcy: 0.884
Epoch 4 step 462: training loss: 4828.452668445522
Epoch 4 step 463: training accuarcy: 0.8935000000000001
Epoch 4 step 463: training loss: 4647.025996439273
Epoch 4 step 464: training accuarcy: 0.8965
Epoch 4 step 464: training loss: 4539.643932288637
Epoch 4 step 465: training accuarcy: 0.902
Epoch 4 step 465: training loss: 4608.579155882223
Epoch 4 step 466: training accuarcy: 0.8985
Epoch 4 step 466: training loss: 4941.611027045799
Epoch 4 step 467: training accuarcy: 0.8905000000000001
Epoch 4 step 467: training loss: 5038.000316609147
Epoch 4 step 468: training accuarcy: 0.8895000000000001
Epoch 4 step 468: training loss: 5358.167528469999
Epoch 4 step 469: training accuarcy: 0.885
Epoch 4 step 469: training loss: 4692.819088854933
Epoch 4 step 470: training accuarcy: 0.8965
Epoch 4 s

Epoch 4 step 544: training accuarcy: 0.899
Epoch 4 step 544: training loss: 3973.7319045159247
Epoch 4 step 545: training accuarcy: 0.9145
Epoch 4 step 545: training loss: 4138.921321383483
Epoch 4 step 546: training accuarcy: 0.911
Epoch 4 step 546: training loss: 4962.606385571121
Epoch 4 step 547: training accuarcy: 0.889
Epoch 4 step 547: training loss: 4276.903015827068
Epoch 4 step 548: training accuarcy: 0.903
Epoch 4 step 548: training loss: 3960.7259930647315
Epoch 4 step 549: training accuarcy: 0.9145
Epoch 4 step 549: training loss: 4438.016670277888
Epoch 4 step 550: training accuarcy: 0.904
Epoch 4 step 550: training loss: 4536.764581433381
Epoch 4 step 551: training accuarcy: 0.9005
Epoch 4 step 551: training loss: 4685.567209651415
Epoch 4 step 552: training accuarcy: 0.895
Epoch 4 step 552: training loss: 4428.935621674545
Epoch 4 step 553: training accuarcy: 0.9035
Epoch 4 step 553: training loss: 4455.365453548988
Epoch 4 step 554: training accuarcy: 0.904
Epoch 4 ste

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [10:40<00:00, 128.36s/it]


In [30]:
del hrm_model
T.cuda.empty_cache()

### Train PRME FM Model

In [31]:
prme_model = TorchPrmeFM(feature_dim=feat_dim, num_dim=NUM_DIM, init_mean=INIT_MEAN)
prme_model

TorchPrmeFM()

In [32]:
adam_opt = optim.Adam(prme_model.parameters(), lr=LEARNING_RATE)
schedular = optim.lr_scheduler.StepLR(adam_opt,
                                      step_size=DECAY_FREQ,
                                      gamma=DECAY_GAMMA)

In [33]:
prme_learner = FMLearner(prme_model, adam_opt, schedular, db)
prme_learner

<models.fm_learner.FMLearner at 0x22d4b6dbef0>

In [34]:
prme_learner.compile(train_col='base',
                   valid_col='seq',
                   test_col='seq',
                   loss_callback=simple_loss_callback)

In [35]:
prme_learner.fit(epoch=5, log_dir=get_log_dir('seq_kaggle', 'prme'))

  0%|                                                                                                                                                                                     | 0/5 [00:00<?, ?it/s]

Epoch: 0
Epoch 0 step 0: training loss: 49105.95866130367
Epoch 0 step 1: training accuarcy: 0.1155
Epoch 0 step 1: training loss: 48562.22836281848
Epoch 0 step 2: training accuarcy: 0.1245
Epoch 0 step 2: training loss: 49555.54119773494
Epoch 0 step 3: training accuarcy: 0.122
Epoch 0 step 3: training loss: 48402.564096958245
Epoch 0 step 4: training accuarcy: 0.134
Epoch 0 step 4: training loss: 47869.023513189764
Epoch 0 step 5: training accuarcy: 0.1545
Epoch 0 step 5: training loss: 47543.51289107889
Epoch 0 step 6: training accuarcy: 0.1445
Epoch 0 step 6: training loss: 46684.45122078633
Epoch 0 step 7: training accuarcy: 0.1645
Epoch 0 step 7: training loss: 46891.728631751816
Epoch 0 step 8: training accuarcy: 0.1585
Epoch 0 step 8: training loss: 47072.48337344827
Epoch 0 step 9: training accuarcy: 0.1495
Epoch 0 step 9: training loss: 46571.25805522571
Epoch 0 step 10: training accuarcy: 0.1485
Epoch 0 step 10: training loss: 46467.00533954098
Epoch 0 step 11: training acc

Epoch 0 step 87: training loss: 37325.80313589282
Epoch 0 step 88: training accuarcy: 0.192
Epoch 0 step 88: training loss: 37235.35503940772
Epoch 0 step 89: training accuarcy: 0.1955
Epoch 0 step 89: training loss: 38129.04263006716
Epoch 0 step 90: training accuarcy: 0.177
Epoch 0 step 90: training loss: 37798.64405042477
Epoch 0 step 91: training accuarcy: 0.185
Epoch 0 step 91: training loss: 38210.20609243295
Epoch 0 step 92: training accuarcy: 0.17350000000000002
Epoch 0 step 92: training loss: 37736.80118434935
Epoch 0 step 93: training accuarcy: 0.1825
Epoch 0 step 93: training loss: 38100.26485261828
Epoch 0 step 94: training accuarcy: 0.17250000000000001
Epoch 0 step 94: training loss: 37798.74529855864
Epoch 0 step 95: training accuarcy: 0.1815
Epoch 0 step 95: training loss: 37817.93444476315
Epoch 0 step 96: training accuarcy: 0.178
Epoch 0 step 96: training loss: 37883.45083250277
Epoch 0 step 97: training accuarcy: 0.181
Epoch 0 step 97: training loss: 37876.93467857227

 20%|██████████████████████████████████▍                                                                                                                                         | 1/5 [02:13<08:52, 133.18s/it]

Epoch: 1
Epoch 1 step 115: training loss: 36911.58440936741
Epoch 1 step 116: training accuarcy: 0.185
Epoch 1 step 116: training loss: 35242.31702081956
Epoch 1 step 117: training accuarcy: 0.223
Epoch 1 step 117: training loss: 34335.93000066455
Epoch 1 step 118: training accuarcy: 0.2435
Epoch 1 step 118: training loss: 33432.220576204694
Epoch 1 step 119: training accuarcy: 0.2685
Epoch 1 step 119: training loss: 34049.51730353616
Epoch 1 step 120: training accuarcy: 0.2505
Epoch 1 step 120: training loss: 32559.39086909172
Epoch 1 step 121: training accuarcy: 0.28250000000000003
Epoch 1 step 121: training loss: 32010.276599136178
Epoch 1 step 122: training accuarcy: 0.291
Epoch 1 step 122: training loss: 31839.392169206963
Epoch 1 step 123: training accuarcy: 0.293
Epoch 1 step 123: training loss: 30484.367595429976
Epoch 1 step 124: training accuarcy: 0.325
Epoch 1 step 124: training loss: 29151.06633362269
Epoch 1 step 125: training accuarcy: 0.352
Epoch 1 step 125: training los

Epoch 1 step 200: training loss: 13149.98364388815
Epoch 1 step 201: training accuarcy: 0.714
Epoch 1 step 201: training loss: 12313.691808781638
Epoch 1 step 202: training accuarcy: 0.7315
Epoch 1 step 202: training loss: 12719.612482340232
Epoch 1 step 203: training accuarcy: 0.722
Epoch 1 step 203: training loss: 12060.000494572982
Epoch 1 step 204: training accuarcy: 0.7375
Epoch 1 step 204: training loss: 12683.758227596429
Epoch 1 step 205: training accuarcy: 0.7205
Epoch 1 step 205: training loss: 11635.734973726952
Epoch 1 step 206: training accuarcy: 0.7455
Epoch 1 step 206: training loss: 11720.208559566865
Epoch 1 step 207: training accuarcy: 0.7425
Epoch 1 step 207: training loss: 11517.361165653461
Epoch 1 step 208: training accuarcy: 0.7485
Epoch 1 step 208: training loss: 11466.36967889952
Epoch 1 step 209: training accuarcy: 0.75
Epoch 1 step 209: training loss: 12197.21269237869
Epoch 1 step 210: training accuarcy: 0.7315
Epoch 1 step 210: training loss: 12787.09473069

 40%|████████████████████████████████████████████████████████████████████▊                                                                                                       | 2/5 [04:24<06:38, 132.77s/it]

Epoch: 2
Epoch 2 step 230: training loss: 11629.131294079036
Epoch 2 step 231: training accuarcy: 0.745
Epoch 2 step 231: training loss: 11102.616532502521
Epoch 2 step 232: training accuarcy: 0.7565000000000001
Epoch 2 step 232: training loss: 11050.73239417948
Epoch 2 step 233: training accuarcy: 0.7585000000000001
Epoch 2 step 233: training loss: 11370.557011645189
Epoch 2 step 234: training accuarcy: 0.751
Epoch 2 step 234: training loss: 11305.943464709178
Epoch 2 step 235: training accuarcy: 0.754
Epoch 2 step 235: training loss: 11043.509212502488
Epoch 2 step 236: training accuarcy: 0.756
Epoch 2 step 236: training loss: 11468.931594084394
Epoch 2 step 237: training accuarcy: 0.748
Epoch 2 step 237: training loss: 10329.269461932836
Epoch 2 step 238: training accuarcy: 0.774
Epoch 2 step 238: training loss: 11019.339312782207
Epoch 2 step 239: training accuarcy: 0.758
Epoch 2 step 239: training loss: 10367.26482022112
Epoch 2 step 240: training accuarcy: 0.773
Epoch 2 step 240:

Epoch 2 step 313: training loss: 10080.891200254502
Epoch 2 step 314: training accuarcy: 0.7815
Epoch 2 step 314: training loss: 10682.52072657432
Epoch 2 step 315: training accuarcy: 0.765
Epoch 2 step 315: training loss: 10111.606203024674
Epoch 2 step 316: training accuarcy: 0.7815
Epoch 2 step 316: training loss: 10791.429917233505
Epoch 2 step 317: training accuarcy: 0.7655000000000001
Epoch 2 step 317: training loss: 10321.976269548792
Epoch 2 step 318: training accuarcy: 0.7765
Epoch 2 step 318: training loss: 9586.48477070473
Epoch 2 step 319: training accuarcy: 0.788
Epoch 2 step 319: training loss: 9838.553226716802
Epoch 2 step 320: training accuarcy: 0.7845
Epoch 2 step 320: training loss: 10219.140768721261
Epoch 2 step 321: training accuarcy: 0.7765
Epoch 2 step 321: training loss: 10073.206534736424
Epoch 2 step 322: training accuarcy: 0.779
Epoch 2 step 322: training loss: 10111.664680947912
Epoch 2 step 323: training accuarcy: 0.7795
Epoch 2 step 323: training loss: 10

 60%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                    | 3/5 [06:36<04:24, 132.32s/it]

Epoch: 3
Epoch 3 step 345: training loss: 9760.933322306992
Epoch 3 step 346: training accuarcy: 0.787
Epoch 3 step 346: training loss: 9448.243592502238
Epoch 3 step 347: training accuarcy: 0.791
Epoch 3 step 347: training loss: 9636.268130945931
Epoch 3 step 348: training accuarcy: 0.7895
Epoch 3 step 348: training loss: 10207.790447511694
Epoch 3 step 349: training accuarcy: 0.7775
Epoch 3 step 349: training loss: 9604.834453351523
Epoch 3 step 350: training accuarcy: 0.7875
Epoch 3 step 350: training loss: 10439.717178448222
Epoch 3 step 351: training accuarcy: 0.771
Epoch 3 step 351: training loss: 10924.441802060215
Epoch 3 step 352: training accuarcy: 0.764
Epoch 3 step 352: training loss: 10000.348431087368
Epoch 3 step 353: training accuarcy: 0.781
Epoch 3 step 353: training loss: 10262.032013066551
Epoch 3 step 354: training accuarcy: 0.774
Epoch 3 step 354: training loss: 9683.6044542373
Epoch 3 step 355: training accuarcy: 0.7875
Epoch 3 step 355: training loss: 10094.85116

Epoch 3 step 430: training loss: 9085.143503805504
Epoch 3 step 431: training accuarcy: 0.8005
Epoch 3 step 431: training loss: 10509.581838001604
Epoch 3 step 432: training accuarcy: 0.7695
Epoch 3 step 432: training loss: 9690.06708341008
Epoch 3 step 433: training accuarcy: 0.7885
Epoch 3 step 433: training loss: 8915.362243324065
Epoch 3 step 434: training accuarcy: 0.804
Epoch 3 step 434: training loss: 10126.232865850454
Epoch 3 step 435: training accuarcy: 0.7805
Epoch 3 step 435: training loss: 9860.092504244798
Epoch 3 step 436: training accuarcy: 0.785
Epoch 3 step 436: training loss: 10704.553623837275
Epoch 3 step 437: training accuarcy: 0.766
Epoch 3 step 437: training loss: 10016.04075246828
Epoch 3 step 438: training accuarcy: 0.7805
Epoch 3 step 438: training loss: 10018.774946268815
Epoch 3 step 439: training accuarcy: 0.782
Epoch 3 step 439: training loss: 9679.979821717883
Epoch 3 step 440: training accuarcy: 0.7865
Epoch 3 step 440: training loss: 9787.607397030208


 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                  | 4/5 [08:47<02:11, 131.88s/it]

Epoch: 4
Epoch 4 step 460: training loss: 9552.878944924561
Epoch 4 step 461: training accuarcy: 0.792
Epoch 4 step 461: training loss: 9986.783699045245
Epoch 4 step 462: training accuarcy: 0.7825
Epoch 4 step 462: training loss: 9642.99674458777
Epoch 4 step 463: training accuarcy: 0.79
Epoch 4 step 463: training loss: 9734.836026068157
Epoch 4 step 464: training accuarcy: 0.7875
Epoch 4 step 464: training loss: 10924.522398426889
Epoch 4 step 465: training accuarcy: 0.762
Epoch 4 step 465: training loss: 10503.556585948145
Epoch 4 step 466: training accuarcy: 0.7705
Epoch 4 step 466: training loss: 11207.160836789406
Epoch 4 step 467: training accuarcy: 0.754
Epoch 4 step 467: training loss: 10174.808312177445
Epoch 4 step 468: training accuarcy: 0.7785
Epoch 4 step 468: training loss: 9971.036035489204
Epoch 4 step 469: training accuarcy: 0.7825
Epoch 4 step 469: training loss: 10161.360640843099
Epoch 4 step 470: training accuarcy: 0.779
Epoch 4 step 470: training loss: 9659.59430

Epoch 4 step 546: training loss: 8912.109444445876
Epoch 4 step 547: training accuarcy: 0.8025
Epoch 4 step 547: training loss: 8519.307486456983
Epoch 4 step 548: training accuarcy: 0.8150000000000001
Epoch 4 step 548: training loss: 8780.589234777886
Epoch 4 step 549: training accuarcy: 0.8075
Epoch 4 step 549: training loss: 8539.848307231936
Epoch 4 step 550: training accuarcy: 0.8150000000000001
Epoch 4 step 550: training loss: 8381.985335583606
Epoch 4 step 551: training accuarcy: 0.8165
Epoch 4 step 551: training loss: 9329.663722266509
Epoch 4 step 552: training accuarcy: 0.795
Epoch 4 step 552: training loss: 8474.959524523945
Epoch 4 step 553: training accuarcy: 0.8135
Epoch 4 step 553: training loss: 9089.199941436025
Epoch 4 step 554: training accuarcy: 0.8015
Epoch 4 step 554: training loss: 8301.212559230642
Epoch 4 step 555: training accuarcy: 0.8155
Epoch 4 step 555: training loss: 8034.299021788043
Epoch 4 step 556: training accuarcy: 0.8240000000000001
Epoch 4 step 55

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [10:57<00:00, 131.35s/it]


In [36]:
del prme_model
T.cuda.empty_cache()

### Train Trans FM Model

In [37]:
trans_model = TorchTransFM(feature_dim=feat_dim, num_dim=NUM_DIM, init_mean=INIT_MEAN)
trans_model

TorchTransFM()

In [38]:
adam_opt = optim.Adam(trans_model.parameters(), lr=LEARNING_RATE)
schedular = optim.lr_scheduler.StepLR(adam_opt,
                                      step_size=DECAY_FREQ,
                                      gamma=DECAY_GAMMA)

In [39]:
trans_learner = FMLearner(trans_model, adam_opt, schedular, db)
trans_learner

<models.fm_learner.FMLearner at 0x22d4b5b8fd0>

In [40]:
trans_learner.compile(train_col='base',
                   valid_col='seq',
                   test_col='seq',
                   loss_callback=simple_loss_callback)

In [43]:
trans_learner.fit(epoch=5,
                  log_dir=get_log_dir('seq_kaggle', 'trans'))


  0%|                                                                                                                                                                                     | 0/5 [00:00<?, ?it/s]

Epoch: 0
Epoch 0 step 0: training loss: 153.81764788398226
Epoch 0 step 1: training accuarcy: 0.995
Epoch 0 step 1: training loss: 153.3502213675638
Epoch 0 step 2: training accuarcy: 0.9955
Epoch 0 step 2: training loss: 142.38539766071315
Epoch 0 step 3: training accuarcy: 0.996
Epoch 0 step 3: training loss: 135.01801298832663
Epoch 0 step 4: training accuarcy: 0.9955
Epoch 0 step 4: training loss: 131.7688863472711
Epoch 0 step 5: training accuarcy: 0.996
Epoch 0 step 5: training loss: 132.4157538827452
Epoch 0 step 6: training accuarcy: 0.9965
Epoch 0 step 6: training loss: 123.51843116641881
Epoch 0 step 7: training accuarcy: 0.9975
Epoch 0 step 7: training loss: 118.70513388489516
Epoch 0 step 8: training accuarcy: 0.998
Epoch 0 step 8: training loss: 118.24749186504516
Epoch 0 step 9: training accuarcy: 0.996
Epoch 0 step 9: training loss: 107.89165459004366
Epoch 0 step 10: training accuarcy: 0.997
Epoch 0 step 10: training loss: 107.35216039381264
Epoch 0 step 11: training ac

Epoch 0 step 88: training loss: 32.12779268374444
Epoch 0 step 89: training accuarcy: 0.999
Epoch 0 step 89: training loss: 27.70145512093664
Epoch 0 step 90: training accuarcy: 0.9985
Epoch 0 step 90: training loss: 33.497832981892486
Epoch 0 step 91: training accuarcy: 0.9965
Epoch 0 step 91: training loss: 36.33263118412496
Epoch 0 step 92: training accuarcy: 0.9955
Epoch 0 step 92: training loss: 35.23590228541349
Epoch 0 step 93: training accuarcy: 0.997
Epoch 0 step 93: training loss: 32.932610709822924
Epoch 0 step 94: training accuarcy: 0.9965
Epoch 0 step 94: training loss: 30.61229098082528
Epoch 0 step 95: training accuarcy: 0.997
Epoch 0 step 95: training loss: 33.40140638897767
Epoch 0 step 96: training accuarcy: 0.996
Epoch 0 step 96: training loss: 31.66790289855498
Epoch 0 step 97: training accuarcy: 0.998
Epoch 0 step 97: training loss: 29.112478774440575
Epoch 0 step 98: training accuarcy: 0.997
Epoch 0 step 98: training loss: 31.252896332206937
Epoch 0 step 99: train


 20%|██████████████████████████████████▍                                                                                                                                         | 1/5 [02:10<08:42, 130.58s/it]

Epoch: 1
Epoch 1 step 115: training loss: 27.94524404971183
Epoch 1 step 116: training accuarcy: 0.9965
Epoch 1 step 116: training loss: 26.176073883566673
Epoch 1 step 117: training accuarcy: 0.9965
Epoch 1 step 117: training loss: 28.64687840467144
Epoch 1 step 118: training accuarcy: 0.996
Epoch 1 step 118: training loss: 25.866252479124793
Epoch 1 step 119: training accuarcy: 0.9985
Epoch 1 step 119: training loss: 29.08756442288909
Epoch 1 step 120: training accuarcy: 0.9965
Epoch 1 step 120: training loss: 24.305204493378948
Epoch 1 step 121: training accuarcy: 0.9985
Epoch 1 step 121: training loss: 26.91854683926728
Epoch 1 step 122: training accuarcy: 0.997
Epoch 1 step 122: training loss: 27.99515291316948
Epoch 1 step 123: training accuarcy: 0.9975
Epoch 1 step 123: training loss: 26.429686818546898
Epoch 1 step 124: training accuarcy: 0.9975
Epoch 1 step 124: training loss: 27.338528974329893
Epoch 1 step 125: training accuarcy: 0.9945
Epoch 1 step 125: training loss: 26.93

Epoch 1 step 201: training loss: 22.107143587360998
Epoch 1 step 202: training accuarcy: 0.998
Epoch 1 step 202: training loss: 27.576145452645388
Epoch 1 step 203: training accuarcy: 0.9965
Epoch 1 step 203: training loss: 16.740285130321112
Epoch 1 step 204: training accuarcy: 0.9995
Epoch 1 step 204: training loss: 14.743250460446168
Epoch 1 step 205: training accuarcy: 0.9985
Epoch 1 step 205: training loss: 23.96768711837344
Epoch 1 step 206: training accuarcy: 0.996
Epoch 1 step 206: training loss: 24.260272360780085
Epoch 1 step 207: training accuarcy: 0.9935
Epoch 1 step 207: training loss: 14.830631511003574
Epoch 1 step 208: training accuarcy: 0.9995
Epoch 1 step 208: training loss: 20.21085305106905
Epoch 1 step 209: training accuarcy: 0.997
Epoch 1 step 209: training loss: 16.87342789841559
Epoch 1 step 210: training accuarcy: 0.998
Epoch 1 step 210: training loss: 20.513912322785213
Epoch 1 step 211: training accuarcy: 0.9985
Epoch 1 step 211: training loss: 18.74890950300


 40%|████████████████████████████████████████████████████████████████████▊                                                                                                       | 2/5 [04:21<06:32, 130.77s/it]

Epoch: 2
Epoch 2 step 230: training loss: 19.788224119170835
Epoch 2 step 231: training accuarcy: 0.998
Epoch 2 step 231: training loss: 18.05255922345845
Epoch 2 step 232: training accuarcy: 0.9985
Epoch 2 step 232: training loss: 17.65970498124308
Epoch 2 step 233: training accuarcy: 0.997
Epoch 2 step 233: training loss: 19.39255872988128
Epoch 2 step 234: training accuarcy: 0.9975
Epoch 2 step 234: training loss: 19.222497901500784
Epoch 2 step 235: training accuarcy: 0.997
Epoch 2 step 235: training loss: 16.993338421121862
Epoch 2 step 236: training accuarcy: 0.9975
Epoch 2 step 236: training loss: 15.804012932007954
Epoch 2 step 237: training accuarcy: 0.998
Epoch 2 step 237: training loss: 16.343163992549893
Epoch 2 step 238: training accuarcy: 0.9985
Epoch 2 step 238: training loss: 17.081941861998363
Epoch 2 step 239: training accuarcy: 0.997
Epoch 2 step 239: training loss: 19.794798001702915
Epoch 2 step 240: training accuarcy: 0.996
Epoch 2 step 240: training loss: 14.4586

Epoch 2 step 316: training accuarcy: 0.998
Epoch 2 step 316: training loss: 12.668858818857784
Epoch 2 step 317: training accuarcy: 0.9985
Epoch 2 step 317: training loss: 20.069550704388085
Epoch 2 step 318: training accuarcy: 0.9965
Epoch 2 step 318: training loss: 12.565184571918888
Epoch 2 step 319: training accuarcy: 0.998
Epoch 2 step 319: training loss: 12.592458347097294
Epoch 2 step 320: training accuarcy: 0.999
Epoch 2 step 320: training loss: 13.4485345883825
Epoch 2 step 321: training accuarcy: 0.998
Epoch 2 step 321: training loss: 14.138688728826823
Epoch 2 step 322: training accuarcy: 0.998
Epoch 2 step 322: training loss: 13.662415232627428
Epoch 2 step 323: training accuarcy: 0.998
Epoch 2 step 323: training loss: 15.97185847483363
Epoch 2 step 324: training accuarcy: 0.997
Epoch 2 step 324: training loss: 16.729889885260768
Epoch 2 step 325: training accuarcy: 0.997
Epoch 2 step 325: training loss: 13.337306712327727
Epoch 2 step 326: training accuarcy: 0.998
Epoch 2 


 60%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                    | 3/5 [06:31<04:20, 130.48s/it]

Epoch: 3
Epoch 3 step 345: training loss: 11.72107588466996
Epoch 3 step 346: training accuarcy: 0.9985
Epoch 3 step 346: training loss: 10.289452983420453
Epoch 3 step 347: training accuarcy: 1.0
Epoch 3 step 347: training loss: 13.199624716395519
Epoch 3 step 348: training accuarcy: 0.999
Epoch 3 step 348: training loss: 14.990211777485406
Epoch 3 step 349: training accuarcy: 0.9975
Epoch 3 step 349: training loss: 14.146641616669447
Epoch 3 step 350: training accuarcy: 0.9975
Epoch 3 step 350: training loss: 18.816216221961728
Epoch 3 step 351: training accuarcy: 0.997
Epoch 3 step 351: training loss: 17.90605565171613
Epoch 3 step 352: training accuarcy: 0.9975
Epoch 3 step 352: training loss: 9.952050833303637
Epoch 3 step 353: training accuarcy: 0.9985
Epoch 3 step 353: training loss: 13.586804121392897
Epoch 3 step 354: training accuarcy: 0.997
Epoch 3 step 354: training loss: 12.55182339200667
Epoch 3 step 355: training accuarcy: 0.998
Epoch 3 step 355: training loss: 9.6538997

Epoch 3 step 431: training loss: 9.883261932044903
Epoch 3 step 432: training accuarcy: 0.9985
Epoch 3 step 432: training loss: 8.459198941692854
Epoch 3 step 433: training accuarcy: 0.999
Epoch 3 step 433: training loss: 6.902177355061276
Epoch 3 step 434: training accuarcy: 0.9995
Epoch 3 step 434: training loss: 10.994764520863374
Epoch 3 step 435: training accuarcy: 0.9975
Epoch 3 step 435: training loss: 8.956970620722524
Epoch 3 step 436: training accuarcy: 0.999
Epoch 3 step 436: training loss: 8.378731230857799
Epoch 3 step 437: training accuarcy: 0.999
Epoch 3 step 437: training loss: 8.398814596461055
Epoch 3 step 438: training accuarcy: 0.9995
Epoch 3 step 438: training loss: 15.362607349522916
Epoch 3 step 439: training accuarcy: 0.9975
Epoch 3 step 439: training loss: 9.349815284527683
Epoch 3 step 440: training accuarcy: 0.999
Epoch 3 step 440: training loss: 12.837034702031705
Epoch 3 step 441: training accuarcy: 0.997
Epoch 3 step 441: training loss: 10.75415889724055
E


 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                  | 4/5 [08:46<02:11, 131.83s/it]

Epoch: 4
Epoch 4 step 460: training loss: 8.432320994336617
Epoch 4 step 461: training accuarcy: 0.9995
Epoch 4 step 461: training loss: 9.284593191006541
Epoch 4 step 462: training accuarcy: 1.0
Epoch 4 step 462: training loss: 10.931732098356909
Epoch 4 step 463: training accuarcy: 0.9985
Epoch 4 step 463: training loss: 11.738029457484958
Epoch 4 step 464: training accuarcy: 0.9975
Epoch 4 step 464: training loss: 9.58981203543564
Epoch 4 step 465: training accuarcy: 0.999
Epoch 4 step 465: training loss: 14.390284305256774
Epoch 4 step 466: training accuarcy: 0.998
Epoch 4 step 466: training loss: 12.3618691464983
Epoch 4 step 467: training accuarcy: 0.9985
Epoch 4 step 467: training loss: 12.046862264737557
Epoch 4 step 468: training accuarcy: 0.9975
Epoch 4 step 468: training loss: 15.023168105120284
Epoch 4 step 469: training accuarcy: 0.9975
Epoch 4 step 469: training loss: 12.644649974601462
Epoch 4 step 470: training accuarcy: 0.998
Epoch 4 step 470: training loss: 15.6319999

Epoch 4 step 546: training loss: 7.24948801900862
Epoch 4 step 547: training accuarcy: 1.0
Epoch 4 step 547: training loss: 7.474856767973147
Epoch 4 step 548: training accuarcy: 0.9995
Epoch 4 step 548: training loss: 9.683199161189027
Epoch 4 step 549: training accuarcy: 0.998
Epoch 4 step 549: training loss: 9.754227760133329
Epoch 4 step 550: training accuarcy: 0.9985
Epoch 4 step 550: training loss: 9.72762045074275
Epoch 4 step 551: training accuarcy: 0.998
Epoch 4 step 551: training loss: 6.614257625487344
Epoch 4 step 552: training accuarcy: 0.9985
Epoch 4 step 552: training loss: 10.99368814261671
Epoch 4 step 553: training accuarcy: 0.998
Epoch 4 step 553: training loss: 7.220322839003553
Epoch 4 step 554: training accuarcy: 0.9995
Epoch 4 step 554: training loss: 11.052163657422616
Epoch 4 step 555: training accuarcy: 0.9965
Epoch 4 step 555: training loss: 6.902849815632748
Epoch 4 step 556: training accuarcy: 0.9995
Epoch 4 step 556: training loss: 11.338319558346619
Epoch


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [10:58<00:00, 131.85s/it]