## Evaluating my KG on OGB dataset

In [13]:
import pandas as pd
import json
import numpy as np

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch_geometric.utils import negative_sampling
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, SAGEConv

from kg_model import KG_model

from ogb.linkproppred import Evaluator, PygLinkPropPredDataset

from pykeen.evaluation import RankBasedEvaluator
from pykeen.triples import TriplesFactory
from pykeen.pipeline import pipeline, pipeline_from_config 

### Load and prepare data

In [4]:
dataset = PygLinkPropPredDataset(name='ogbl-ddi', transform=T.ToSparseTensor())
data = dataset[0]
data.adj_t

SparseTensor(row=tensor([   0,    0,    0,  ..., 4266, 4266, 4266]),
             col=tensor([   4,    6,    7,  ..., 3953, 3972, 4014]),
             size=(4267, 4267), nnz=2135822, density=11.73%)

In [5]:
split_edge = dataset.get_edge_split()
train_edge, valid_edge, test_edge = split_edge["train"], split_edge["valid"], split_edge["test"]
train_edge

{'edge': tensor([[4039, 2424],
         [4039,  225],
         [4039, 3901],
         ...,
         [ 647,  708],
         [ 708,  338],
         [ 835, 3554]])}

In [6]:
print(valid_edge['edge_neg'].shape)
print(valid_edge['edge'].shape)

torch.Size([101882, 2])
torch.Size([133489, 2])


In [7]:
def convert_to_triples_factory(data):
    tf_data = TriplesFactory.from_labeled_triples(
        data[["head", "relation", "tail"]].values,
        create_inverse_triples=True,
        entity_to_id=None,
        relation_to_id=None,
        compact_id=False 
    )

    print(tf_data.mapped_triples)

    return tf_data

In [8]:
# add relation type - interacts with

train = train_edge['edge']
train = torch.tensor([[x[0], 0, x[1]] for x in train])
train_df = pd.DataFrame(train, columns=['head', 'relation', 'tail']).astype(str)

valid = valid_edge['edge']
valid = torch.tensor([[x[0], 0, x[1]] for x in valid])
valid_df = pd.DataFrame(valid, columns=['head', 'relation', 'tail']).astype(str)

valid_neg = valid_edge['edge_neg']
valid_neg = torch.tensor([[x[0], 0, x[1]] for x in valid_neg])

test = test_edge['edge']
test = torch.tensor([[x[0], 0, x[1]] for x in test])
test_df = pd.DataFrame(test, columns=['head', 'relation', 'tail']).astype(str)

test_neg = test_edge['edge_neg']
test_neg = torch.tensor([[x[0], 0, x[1]] for x in test_neg])

train_tf = convert_to_triples_factory(train_df)
valid_tf = convert_to_triples_factory(valid_df)
test_tf = convert_to_triples_factory(test_df)

tensor([[   0,    0,  667],
        [   0,    0, 1182],
        [   0,    0, 1280],
        ...,
        [4266,    0, 4250],
        [4266,    0, 4252],
        [4266,    0, 4260]])
tensor([[   0,    0,  729],
        [   1,    0,  681],
        [   1,    0,  768],
        ...,
        [3812,    0, 3722],
        [3812,    0, 3758],
        [3812,    0, 3802]])
tensor([[   0,    0,    3],
        [   0,    0,  185],
        [   0,    0,  187],
        ...,
        [1611,    0, 1562],
        [1611,    0, 1573],
        [1611,    0, 1601]])


In [9]:
# save dataset split to txt files

dir_data_my_split = 'dataset/ogbl_ddi-my_split/'

train_df.to_csv(dir_data_my_split + 'train.txt', sep='\t', header=False, index=False)
valid_df.to_csv(dir_data_my_split + 'valid.txt', sep='\t', header=False, index=False)
test_df.to_csv(dir_data_my_split + 'test.txt', sep='\t', header=False, index=False)

### Train my KG model

In [None]:
model_kg = KG_model('TransE', train_tf, valid_tf, test_tf, 'ogb')
model_kg.set_params(20, 'Adam', RankBasedEvaluator, 'gpu')
print('Training...')
model_kg.train()
print('Training done')

In [17]:
config = {
    'metadata': dict(
        title='TransE model - best config'
    ),
    'pipeline': dict(
        training = train_tf,
        validation = valid_tf,
        testing = test_tf,
        model='TransE',
        model_kwargs=dict(
               embedding_dim=150,
        ),
        optimizer='Adam',
        optimizer_kwargs=dict(lr=0.001),
        loss='marginranking',
        loss_kwargs=dict(margin=1.3),
        training_loop='slcwa',
        training_kwargs=dict(num_epochs=100, batch_size=32),
        negative_sampler='basic',
        negative_sampler_kwargs=dict(num_negs_per_pos=9),
        evaluator='rankbased',
        evaluator_kwargs=dict(filtered=True),
        evaluation_kwargs=dict(batch_size=32),
    )
}

In [18]:
pipeline_result = pipeline_from_config(config)

No random seed is specified. Setting to 1166338746.
INFO:pykeen.triples.triples_factory:Creating inverse triples.
Training epochs on cuda:0:   0%|                     | 0/100 [00:00<?, ?epoch/s]INFO:pykeen.triples.triples_factory:Creating inverse triples.

Training batches on cuda:0:   0%|                  | 0/66745 [00:00<?, ?batch/s][A
Training batches on cuda:0:   0%|        | 12/66745 [00:00<09:17, 119.61batch/s][A
Training batches on cuda:0:   0%|        | 60/66745 [00:00<03:21, 330.40batch/s][A
Training batches on cuda:0:   0%|       | 113/66745 [00:00<02:39, 418.32batch/s][A
Training batches on cuda:0:   0%|       | 166/66745 [00:00<02:24, 461.92batch/s][A
Training batches on cuda:0:   0%|       | 219/66745 [00:00<02:17, 483.19batch/s][A
Training batches on cuda:0:   0%|       | 276/66745 [00:00<02:09, 511.36batch/s][A
Training batches on cuda:0:   1%|       | 335/66745 [00:00<02:03, 536.57batch/s][A
Training batches on cuda:0:   1%|       | 395/66745 [00:00<01:59, 554.0

Training batches on cuda:0:   8%|▍     | 5499/66745 [00:09<01:43, 592.53batch/s][A
Training batches on cuda:0:   8%|▍     | 5559/66745 [00:09<01:43, 593.11batch/s][A
Training batches on cuda:0:   8%|▌     | 5619/66745 [00:09<01:43, 592.44batch/s][A
Training batches on cuda:0:   9%|▌     | 5679/66745 [00:09<01:43, 592.19batch/s][A
Training batches on cuda:0:   9%|▌     | 5739/66745 [00:09<01:43, 592.14batch/s][A
Training batches on cuda:0:   9%|▌     | 5799/66745 [00:10<01:42, 592.06batch/s][A
Training batches on cuda:0:   9%|▌     | 5859/66745 [00:10<01:42, 592.50batch/s][A
Training batches on cuda:0:   9%|▌     | 5919/66745 [00:10<01:42, 592.28batch/s][A
Training batches on cuda:0:   9%|▌     | 5979/66745 [00:10<01:46, 571.11batch/s][A
Training batches on cuda:0:   9%|▌     | 6037/66745 [00:10<01:49, 556.03batch/s][A
Training batches on cuda:0:   9%|▌     | 6093/66745 [00:10<01:51, 545.67batch/s][A
Training batches on cuda:0:   9%|▌     | 6153/66745 [00:10<01:48, 559.16batc

Training batches on cuda:0:  17%|▊    | 11230/66745 [00:19<01:38, 563.51batch/s][A
Training batches on cuda:0:  17%|▊    | 11292/66745 [00:19<01:35, 577.76batch/s][A
Training batches on cuda:0:  17%|▊    | 11350/66745 [00:19<01:36, 574.76batch/s][A
Training batches on cuda:0:  17%|▊    | 11409/66745 [00:19<01:35, 577.28batch/s][A
Training batches on cuda:0:  17%|▊    | 11469/66745 [00:19<01:35, 581.11batch/s][A
Training batches on cuda:0:  17%|▊    | 11528/66745 [00:20<01:35, 580.97batch/s][A
Training batches on cuda:0:  17%|▊    | 11587/66745 [00:20<01:34, 581.04batch/s][A
Training batches on cuda:0:  17%|▊    | 11647/66745 [00:20<01:34, 584.56batch/s][A
Training batches on cuda:0:  18%|▉    | 11707/66745 [00:20<01:33, 587.30batch/s][A
Training batches on cuda:0:  18%|▉    | 11766/66745 [00:20<01:33, 587.53batch/s][A
Training batches on cuda:0:  18%|▉    | 11827/66745 [00:20<01:32, 593.79batch/s][A
Training batches on cuda:0:  18%|▉    | 11887/66745 [00:20<01:32, 595.29batc

Training batches on cuda:0:  25%|█▎   | 16963/66745 [00:29<01:25, 584.28batch/s][A
Training batches on cuda:0:  26%|█▎   | 17022/66745 [00:29<01:24, 585.65batch/s][A
Training batches on cuda:0:  26%|█▎   | 17081/66745 [00:29<01:24, 586.80batch/s][A
Training batches on cuda:0:  26%|█▎   | 17140/66745 [00:29<01:24, 587.43batch/s][A
Training batches on cuda:0:  26%|█▎   | 17199/66745 [00:29<01:25, 579.16batch/s][A
Training batches on cuda:0:  26%|█▎   | 17259/66745 [00:29<01:24, 583.97batch/s][A
Training batches on cuda:0:  26%|█▎   | 17319/66745 [00:30<01:24, 586.79batch/s][A
Training batches on cuda:0:  26%|█▎   | 17379/66745 [00:30<01:23, 588.62batch/s][A
Training batches on cuda:0:  26%|█▎   | 17439/66745 [00:30<01:23, 591.67batch/s][A
Training batches on cuda:0:  26%|█▎   | 17499/66745 [00:30<01:22, 593.33batch/s][A
Training batches on cuda:0:  26%|█▎   | 17559/66745 [00:30<01:23, 592.07batch/s][A
Training batches on cuda:0:  26%|█▎   | 17619/66745 [00:30<01:23, 590.75batc

Training batches on cuda:0:  34%|█▋   | 22689/66745 [00:39<01:15, 585.71batch/s][A
Training batches on cuda:0:  34%|█▋   | 22750/66745 [00:39<01:14, 591.71batch/s][A
Training batches on cuda:0:  34%|█▋   | 22811/66745 [00:39<01:13, 596.06batch/s][A
Training batches on cuda:0:  34%|█▋   | 22871/66745 [00:39<01:14, 590.60batch/s][A
Training batches on cuda:0:  34%|█▋   | 22931/66745 [00:39<01:17, 566.63batch/s][A
Training batches on cuda:0:  34%|█▋   | 22991/66745 [00:39<01:16, 574.97batch/s][A
Training batches on cuda:0:  35%|█▋   | 23049/66745 [00:39<01:17, 560.97batch/s][A
Training batches on cuda:0:  35%|█▋   | 23106/66745 [00:40<01:18, 558.53batch/s][A
Training batches on cuda:0:  35%|█▋   | 23164/66745 [00:40<01:17, 563.40batch/s][A
Training batches on cuda:0:  35%|█▋   | 23222/66745 [00:40<01:16, 567.61batch/s][A
Training batches on cuda:0:  35%|█▋   | 23281/66745 [00:40<01:15, 573.21batch/s][A
Training batches on cuda:0:  35%|█▋   | 23342/66745 [00:40<01:14, 584.04batc

Training batches on cuda:0:  43%|██▏  | 28403/66745 [00:49<01:05, 582.91batch/s][A
Training batches on cuda:0:  43%|██▏  | 28462/66745 [00:49<01:05, 581.67batch/s][A
Training batches on cuda:0:  43%|██▏  | 28521/66745 [00:49<01:05, 583.74batch/s][A
Training batches on cuda:0:  43%|██▏  | 28580/66745 [00:49<01:07, 565.85batch/s][A
Training batches on cuda:0:  43%|██▏  | 28637/66745 [00:49<01:07, 565.19batch/s][A
Training batches on cuda:0:  43%|██▏  | 28694/66745 [00:49<01:08, 554.90batch/s][A
Training batches on cuda:0:  43%|██▏  | 28750/66745 [00:49<01:09, 544.75batch/s][A
Training batches on cuda:0:  43%|██▏  | 28805/66745 [00:50<01:09, 545.22batch/s][A
Training batches on cuda:0:  43%|██▏  | 28864/66745 [00:50<01:08, 555.80batch/s][A
Training batches on cuda:0:  43%|██▏  | 28924/66745 [00:50<01:06, 567.54batch/s][A
Training batches on cuda:0:  43%|██▏  | 28981/66745 [00:50<01:06, 565.00batch/s][A
Training batches on cuda:0:  44%|██▏  | 29038/66745 [00:50<01:08, 550.78batc

Training batches on cuda:0:  51%|██▌  | 34083/66745 [00:59<00:59, 549.59batch/s][A
Training batches on cuda:0:  51%|██▌  | 34144/66745 [00:59<00:57, 565.75batch/s][A
Training batches on cuda:0:  51%|██▌  | 34203/66745 [00:59<00:56, 572.60batch/s][A
Training batches on cuda:0:  51%|██▌  | 34264/66745 [00:59<00:55, 582.25batch/s][A
Training batches on cuda:0:  51%|██▌  | 34323/66745 [00:59<00:56, 573.35batch/s][A
Training batches on cuda:0:  52%|██▌  | 34381/66745 [00:59<00:56, 570.85batch/s][A
Training batches on cuda:0:  52%|██▌  | 34440/66745 [00:59<00:56, 573.74batch/s][A
Training batches on cuda:0:  52%|██▌  | 34498/66745 [00:59<00:56, 572.20batch/s][A
Training batches on cuda:0:  52%|██▌  | 34557/66745 [01:00<00:55, 574.86batch/s][A
Training batches on cuda:0:  52%|██▌  | 34615/66745 [01:00<00:56, 571.95batch/s][A
Training batches on cuda:0:  52%|██▌  | 34673/66745 [01:00<00:56, 569.28batch/s][A
Training batches on cuda:0:  52%|██▌  | 34732/66745 [01:00<00:55, 572.93batc

Training batches on cuda:0:  60%|██▉  | 39826/66745 [01:09<00:45, 587.78batch/s][A
Training batches on cuda:0:  60%|██▉  | 39886/66745 [01:09<00:45, 588.92batch/s][A
Training batches on cuda:0:  60%|██▉  | 39946/66745 [01:09<00:45, 589.58batch/s][A
Training batches on cuda:0:  60%|██▉  | 40005/66745 [01:09<00:46, 571.44batch/s][A
Training batches on cuda:0:  60%|███  | 40063/66745 [01:09<00:46, 568.24batch/s][A
Training batches on cuda:0:  60%|███  | 40122/66745 [01:09<00:46, 574.05batch/s][A
Training batches on cuda:0:  60%|███  | 40181/66745 [01:09<00:45, 578.33batch/s][A
Training batches on cuda:0:  60%|███  | 40240/66745 [01:09<00:45, 580.53batch/s][A
Training batches on cuda:0:  60%|███  | 40299/66745 [01:09<00:45, 582.65batch/s][A
Training batches on cuda:0:  60%|███  | 40358/66745 [01:10<00:45, 584.30batch/s][A
Training batches on cuda:0:  61%|███  | 40417/66745 [01:10<00:44, 585.57batch/s][A
Training batches on cuda:0:  61%|███  | 40476/66745 [01:10<00:44, 586.64batc

Training batches on cuda:0:  68%|███▍ | 45539/66745 [01:18<00:36, 576.47batch/s][A
Training batches on cuda:0:  68%|███▍ | 45597/66745 [01:19<00:37, 567.04batch/s][A
Training batches on cuda:0:  68%|███▍ | 45654/66745 [01:19<00:37, 560.43batch/s][A
Training batches on cuda:0:  68%|███▍ | 45716/66745 [01:19<00:36, 575.15batch/s][A
Training batches on cuda:0:  69%|███▍ | 45774/66745 [01:19<00:37, 564.82batch/s][A
Training batches on cuda:0:  69%|███▍ | 45831/66745 [01:19<00:37, 559.14batch/s][A
Training batches on cuda:0:  69%|███▍ | 45887/66745 [01:19<00:37, 559.12batch/s][A
Training batches on cuda:0:  69%|███▍ | 45944/66745 [01:19<00:37, 559.86batch/s][A
Training batches on cuda:0:  69%|███▍ | 46001/66745 [01:19<00:36, 561.82batch/s][A
Training batches on cuda:0:  69%|███▍ | 46058/66745 [01:19<00:36, 559.73batch/s][A
Training batches on cuda:0:  69%|███▍ | 46115/66745 [01:20<00:36, 562.04batch/s][A
Training batches on cuda:0:  69%|███▍ | 46176/66745 [01:20<00:35, 575.06batc

Training batches on cuda:0:  77%|███▊ | 51229/66745 [01:28<00:27, 565.66batch/s][A
Training batches on cuda:0:  77%|███▊ | 51286/66745 [01:28<00:27, 560.36batch/s][A
Training batches on cuda:0:  77%|███▊ | 51345/66745 [01:29<00:27, 568.49batch/s][A
Training batches on cuda:0:  77%|███▊ | 51402/66745 [01:29<00:27, 566.05batch/s][A
Training batches on cuda:0:  77%|███▊ | 51462/66745 [01:29<00:26, 573.39batch/s][A
Training batches on cuda:0:  77%|███▊ | 51520/66745 [01:29<00:26, 569.21batch/s][A
Training batches on cuda:0:  77%|███▊ | 51577/66745 [01:29<00:26, 567.88batch/s][A
Training batches on cuda:0:  77%|███▊ | 51636/66745 [01:29<00:26, 572.00batch/s][A
Training batches on cuda:0:  77%|███▊ | 51695/66745 [01:29<00:26, 575.35batch/s][A
Training batches on cuda:0:  78%|███▉ | 51754/66745 [01:29<00:25, 577.38batch/s][A
Training batches on cuda:0:  78%|███▉ | 51813/66745 [01:29<00:25, 577.60batch/s][A
Training batches on cuda:0:  78%|███▉ | 51872/66745 [01:30<00:25, 580.53batc

Training batches on cuda:0:  85%|████▎| 56960/66745 [01:38<00:16, 588.31batch/s][A
Training batches on cuda:0:  85%|████▎| 57020/66745 [01:38<00:16, 588.92batch/s][A
Training batches on cuda:0:  86%|████▎| 57079/66745 [01:39<00:16, 589.17batch/s][A
Training batches on cuda:0:  86%|████▎| 57139/66745 [01:39<00:16, 589.44batch/s][A
Training batches on cuda:0:  86%|████▎| 57199/66745 [01:39<00:16, 590.02batch/s][A
Training batches on cuda:0:  86%|████▎| 57259/66745 [01:39<00:16, 589.63batch/s][A
Training batches on cuda:0:  86%|████▎| 57318/66745 [01:39<00:16, 583.20batch/s][A
Training batches on cuda:0:  86%|████▎| 57377/66745 [01:39<00:16, 577.35batch/s][A
Training batches on cuda:0:  86%|████▎| 57436/66745 [01:39<00:16, 579.09batch/s][A
Training batches on cuda:0:  86%|████▎| 57496/66745 [01:39<00:15, 583.31batch/s][A
Training batches on cuda:0:  86%|████▎| 57556/66745 [01:39<00:15, 586.50batch/s][A
Training batches on cuda:0:  86%|████▎| 57615/66745 [01:39<00:15, 582.41batc

Training batches on cuda:0:  94%|████▋| 62682/66745 [01:48<00:07, 564.28batch/s][A
Training batches on cuda:0:  94%|████▋| 62739/66745 [01:48<00:07, 563.68batch/s][A
Training batches on cuda:0:  94%|████▋| 62796/66745 [01:48<00:07, 560.96batch/s][A
Training batches on cuda:0:  94%|████▋| 62853/66745 [01:49<00:06, 557.72batch/s][A
Training batches on cuda:0:  94%|████▋| 62913/66745 [01:49<00:06, 568.30batch/s][A
Training batches on cuda:0:  94%|████▋| 62975/66745 [01:49<00:06, 581.80batch/s][A
Training batches on cuda:0:  94%|████▋| 63034/66745 [01:49<00:06, 582.20batch/s][A
Training batches on cuda:0:  95%|████▋| 63093/66745 [01:49<00:06, 582.42batch/s][A
Training batches on cuda:0:  95%|████▋| 63152/66745 [01:49<00:06, 582.59batch/s][A
Training batches on cuda:0:  95%|████▋| 63211/66745 [01:49<00:06, 582.67batch/s][A
Training batches on cuda:0:  95%|████▋| 63270/66745 [01:49<00:05, 582.80batch/s][A
Training batches on cuda:0:  95%|████▋| 63329/66745 [01:49<00:05, 582.37batc

Training batches on cuda:0:   2%|▏     | 1521/66745 [00:02<01:52, 577.93batch/s][A
Training batches on cuda:0:   2%|▏     | 1579/66745 [00:02<01:53, 574.82batch/s][A
Training batches on cuda:0:   2%|▏     | 1637/66745 [00:02<01:53, 571.56batch/s][A
Training batches on cuda:0:   3%|▏     | 1695/66745 [00:03<01:54, 569.61batch/s][A
Training batches on cuda:0:   3%|▏     | 1754/66745 [00:03<01:53, 574.46batch/s][A
Training batches on cuda:0:   3%|▏     | 1814/66745 [00:03<01:52, 579.62batch/s][A
Training batches on cuda:0:   3%|▏     | 1872/66745 [00:03<01:52, 578.07batch/s][A
Training batches on cuda:0:   3%|▏     | 1930/66745 [00:03<01:53, 572.90batch/s][A
Training batches on cuda:0:   3%|▏     | 1990/66745 [00:03<01:51, 578.52batch/s][A
Training batches on cuda:0:   3%|▏     | 2049/66745 [00:03<01:51, 581.59batch/s][A
Training batches on cuda:0:   3%|▏     | 2108/66745 [00:03<01:52, 573.95batch/s][A
Training batches on cuda:0:   3%|▏     | 2166/66745 [00:03<01:54, 566.28batc

Training batches on cuda:0:  11%|▋     | 7143/66745 [00:12<01:45, 564.62batch/s][A
Training batches on cuda:0:  11%|▋     | 7201/66745 [00:12<01:44, 569.03batch/s][A
Training batches on cuda:0:  11%|▋     | 7259/66745 [00:12<01:44, 571.87batch/s][A
Training batches on cuda:0:  11%|▋     | 7317/66745 [00:13<01:44, 569.98batch/s][A
Training batches on cuda:0:  11%|▋     | 7375/66745 [00:13<01:46, 559.97batch/s][A
Training batches on cuda:0:  11%|▋     | 7432/66745 [00:13<01:46, 555.92batch/s][A
Training batches on cuda:0:  11%|▋     | 7489/66745 [00:13<01:46, 558.83batch/s][A
Training batches on cuda:0:  11%|▋     | 7546/66745 [00:13<01:45, 559.15batch/s][A
Training batches on cuda:0:  11%|▋     | 7606/66745 [00:13<01:43, 570.05batch/s][A
Training batches on cuda:0:  11%|▋     | 7665/66745 [00:13<01:42, 575.22batch/s][A
Training batches on cuda:0:  12%|▋     | 7724/66745 [00:13<01:42, 578.21batch/s][A
Training batches on cuda:0:  12%|▋     | 7782/66745 [00:13<01:41, 578.17batc

Training batches on cuda:0:  19%|▉    | 12769/66745 [00:22<01:35, 567.97batch/s][A
Training batches on cuda:0:  19%|▉    | 12828/66745 [00:22<01:33, 574.22batch/s][A
Training batches on cuda:0:  19%|▉    | 12887/66745 [00:22<01:33, 577.88batch/s][A
Training batches on cuda:0:  19%|▉    | 12945/66745 [00:22<01:33, 574.52batch/s][A
Training batches on cuda:0:  19%|▉    | 13004/66745 [00:23<01:33, 577.28batch/s][A
Training batches on cuda:0:  20%|▉    | 13063/66745 [00:23<01:32, 579.16batch/s][A
Training batches on cuda:0:  20%|▉    | 13121/66745 [00:23<01:35, 564.20batch/s][A
Training batches on cuda:0:  20%|▉    | 13181/66745 [00:23<01:33, 572.05batch/s][A
Training batches on cuda:0:  20%|▉    | 13239/66745 [00:23<01:35, 558.37batch/s][A
Training batches on cuda:0:  20%|▉    | 13298/66745 [00:23<01:34, 564.95batch/s][A
Training batches on cuda:0:  20%|█    | 13356/66745 [00:23<01:34, 565.59batch/s][A
Training batches on cuda:0:  20%|█    | 13416/66745 [00:23<01:33, 573.31batc

Training batches on cuda:0:  28%|█▍   | 18433/66745 [00:32<01:22, 584.43batch/s][A
Training batches on cuda:0:  28%|█▍   | 18492/66745 [00:32<01:22, 585.69batch/s][A
Training batches on cuda:0:  28%|█▍   | 18551/66745 [00:32<01:22, 586.40batch/s][A
Training batches on cuda:0:  28%|█▍   | 18610/66745 [00:32<01:22, 586.76batch/s][A
Training batches on cuda:0:  28%|█▍   | 18669/66745 [00:33<01:23, 574.10batch/s][A
Training batches on cuda:0:  28%|█▍   | 18727/66745 [00:33<01:24, 567.71batch/s][A
Training batches on cuda:0:  28%|█▍   | 18784/66745 [00:33<01:25, 562.31batch/s][A
Training batches on cuda:0:  28%|█▍   | 18843/66745 [00:33<01:24, 568.51batch/s][A
Training batches on cuda:0:  28%|█▍   | 18901/66745 [00:33<01:23, 571.03batch/s][A
Training batches on cuda:0:  28%|█▍   | 18959/66745 [00:33<01:23, 572.68batch/s][A
Training batches on cuda:0:  28%|█▍   | 19017/66745 [00:33<01:24, 563.82batch/s][A
Training batches on cuda:0:  29%|█▍   | 19076/66745 [00:33<01:23, 570.41batc

Training batches on cuda:0:  36%|█▊   | 24075/66745 [00:42<01:13, 577.40batch/s][A
Training batches on cuda:0:  36%|█▊   | 24133/66745 [00:42<01:14, 569.79batch/s][A
Training batches on cuda:0:  36%|█▊   | 24191/66745 [00:42<01:15, 564.55batch/s][A
Training batches on cuda:0:  36%|█▊   | 24248/66745 [00:42<01:15, 562.09batch/s][A
Training batches on cuda:0:  36%|█▊   | 24305/66745 [00:42<01:16, 554.08batch/s][A
Training batches on cuda:0:  36%|█▊   | 24361/66745 [00:43<01:17, 547.06batch/s][A
Training batches on cuda:0:  37%|█▊   | 24421/66745 [00:43<01:15, 559.86batch/s][A
Training batches on cuda:0:  37%|█▊   | 24478/66745 [00:43<01:16, 556.08batch/s][A
Training batches on cuda:0:  37%|█▊   | 24536/66745 [00:43<01:15, 561.16batch/s][A
Training batches on cuda:0:  37%|█▊   | 24594/66745 [00:43<01:14, 565.29batch/s][A
Training batches on cuda:0:  37%|█▊   | 24652/66745 [00:43<01:14, 567.30batch/s][A
Training batches on cuda:0:  37%|█▊   | 24709/66745 [00:43<01:14, 565.20batc

Training batches on cuda:0:  45%|██▏  | 29742/66745 [00:52<01:03, 580.67batch/s][A
Training batches on cuda:0:  45%|██▏  | 29801/66745 [00:52<01:03, 582.42batch/s][A
Training batches on cuda:0:  45%|██▏  | 29860/66745 [00:52<01:03, 583.00batch/s][A
Training batches on cuda:0:  45%|██▏  | 29919/66745 [00:52<01:03, 583.98batch/s][A
Training batches on cuda:0:  45%|██▏  | 29978/66745 [00:52<01:02, 585.19batch/s][A
Training batches on cuda:0:  45%|██▎  | 30037/66745 [00:52<01:02, 585.41batch/s][A
Training batches on cuda:0:  45%|██▎  | 30096/66745 [00:53<01:02, 585.07batch/s][A
Training batches on cuda:0:  45%|██▎  | 30155/66745 [00:53<01:03, 574.50batch/s][A
Training batches on cuda:0:  45%|██▎  | 30213/66745 [00:53<01:04, 565.57batch/s][A
Training batches on cuda:0:  45%|██▎  | 30270/66745 [00:53<01:04, 562.44batch/s][A
Training batches on cuda:0:  45%|██▎  | 30329/66745 [00:53<01:03, 569.39batch/s][A
Training batches on cuda:0:  46%|██▎  | 30388/66745 [00:53<01:03, 574.63batc

Training batches on cuda:0:  53%|██▋  | 35388/66745 [01:02<00:54, 578.50batch/s][A
Training batches on cuda:0:  53%|██▋  | 35447/66745 [01:02<00:53, 579.62batch/s][A
Training batches on cuda:0:  53%|██▋  | 35506/66745 [01:02<00:53, 580.49batch/s][A
Training batches on cuda:0:  53%|██▋  | 35565/66745 [01:02<00:53, 581.44batch/s][A
Training batches on cuda:0:  53%|██▋  | 35625/66745 [01:02<00:53, 584.18batch/s][A
Training batches on cuda:0:  53%|██▋  | 35684/66745 [01:02<00:53, 584.21batch/s][A
Training batches on cuda:0:  54%|██▋  | 35743/66745 [01:03<00:53, 584.73batch/s][A
Training batches on cuda:0:  54%|██▋  | 35802/66745 [01:03<00:53, 581.41batch/s][A
Training batches on cuda:0:  54%|██▋  | 35861/66745 [01:03<00:53, 580.44batch/s][A
Training batches on cuda:0:  54%|██▋  | 35920/66745 [01:03<00:53, 580.27batch/s][A
Training batches on cuda:0:  54%|██▋  | 35979/66745 [01:03<00:54, 565.96batch/s][A
Training batches on cuda:0:  54%|██▋  | 36036/66745 [01:03<00:55, 557.57batc

Training batches on cuda:0:  61%|███  | 41010/66745 [01:12<00:46, 547.88batch/s][A
Training batches on cuda:0:  62%|███  | 41065/66745 [01:12<00:47, 544.70batch/s][A
Training batches on cuda:0:  62%|███  | 41120/66745 [01:12<00:47, 543.11batch/s][A
Training batches on cuda:0:  62%|███  | 41178/66745 [01:12<00:46, 553.31batch/s][A
Training batches on cuda:0:  62%|███  | 41236/66745 [01:12<00:45, 560.69batch/s][A
Training batches on cuda:0:  62%|███  | 41293/66745 [01:12<00:46, 548.42batch/s][A
Training batches on cuda:0:  62%|███  | 41348/66745 [01:12<00:46, 543.78batch/s][A
Training batches on cuda:0:  62%|███  | 41405/66745 [01:13<00:46, 549.01batch/s][A
Training batches on cuda:0:  62%|███  | 41460/66745 [01:13<00:46, 545.75batch/s][A
Training batches on cuda:0:  62%|███  | 41515/66745 [01:13<00:46, 543.61batch/s][A
Training batches on cuda:0:  62%|███  | 41570/66745 [01:13<00:46, 542.86batch/s][A
Training batches on cuda:0:  62%|███  | 41627/66745 [01:13<00:45, 548.48batc

Training batches on cuda:0:  70%|███▌ | 46754/66745 [01:22<00:33, 589.88batch/s][A
Training batches on cuda:0:  70%|███▌ | 46814/66745 [01:22<00:33, 590.76batch/s][A
Training batches on cuda:0:  70%|███▌ | 46874/66745 [01:22<00:33, 586.98batch/s][A
Training batches on cuda:0:  70%|███▌ | 46934/66745 [01:22<00:33, 589.13batch/s][A
Training batches on cuda:0:  70%|███▌ | 46994/66745 [01:22<00:33, 590.78batch/s][A
Training batches on cuda:0:  70%|███▌ | 47054/66745 [01:22<00:33, 590.85batch/s][A
Training batches on cuda:0:  71%|███▌ | 47114/66745 [01:22<00:34, 576.93batch/s][A
Training batches on cuda:0:  71%|███▌ | 47172/66745 [01:22<00:34, 564.77batch/s][A
Training batches on cuda:0:  71%|███▌ | 47229/66745 [01:23<00:34, 562.57batch/s][A
Training batches on cuda:0:  71%|███▌ | 47287/66745 [01:23<00:34, 565.14batch/s][A
Training batches on cuda:0:  71%|███▌ | 47344/66745 [01:23<00:34, 554.44batch/s][A
Training batches on cuda:0:  71%|███▌ | 47400/66745 [01:23<00:35, 548.83batc

Training batches on cuda:0:  79%|███▉ | 52536/66745 [01:32<00:24, 589.55batch/s][A
Training batches on cuda:0:  79%|███▉ | 52595/66745 [01:32<00:24, 588.54batch/s][A
Training batches on cuda:0:  79%|███▉ | 52654/66745 [01:32<00:23, 588.78batch/s][A
Training batches on cuda:0:  79%|███▉ | 52714/66745 [01:32<00:23, 590.27batch/s][A
Training batches on cuda:0:  79%|███▉ | 52774/66745 [01:32<00:23, 591.56batch/s][A
Training batches on cuda:0:  79%|███▉ | 52834/66745 [01:32<00:23, 591.80batch/s][A
Training batches on cuda:0:  79%|███▉ | 52894/66745 [01:32<00:23, 592.11batch/s][A
Training batches on cuda:0:  79%|███▉ | 52954/66745 [01:32<00:23, 592.02batch/s][A
Training batches on cuda:0:  79%|███▉ | 53014/66745 [01:32<00:23, 592.04batch/s][A
Training batches on cuda:0:  80%|███▉ | 53074/66745 [01:33<00:23, 592.40batch/s][A
Training batches on cuda:0:  80%|███▉ | 53134/66745 [01:33<00:23, 590.77batch/s][A
Training batches on cuda:0:  80%|███▉ | 53194/66745 [01:33<00:22, 589.83batc

Training batches on cuda:0:  87%|████▎| 58271/66745 [01:41<00:14, 575.68batch/s][A
Training batches on cuda:0:  87%|████▎| 58331/66745 [01:42<00:14, 580.15batch/s][A
Training batches on cuda:0:  87%|████▎| 58390/66745 [01:42<00:14, 580.52batch/s][A
Training batches on cuda:0:  88%|████▍| 58449/66745 [01:42<00:14, 574.21batch/s][A
Training batches on cuda:0:  88%|████▍| 58509/66745 [01:42<00:14, 580.91batch/s][A
Training batches on cuda:0:  88%|████▍| 58569/66745 [01:42<00:13, 584.13batch/s][A
Training batches on cuda:0:  88%|████▍| 58629/66745 [01:42<00:13, 586.55batch/s][A
Training batches on cuda:0:  88%|████▍| 58689/66745 [01:42<00:13, 588.05batch/s][A
Training batches on cuda:0:  88%|████▍| 58749/66745 [01:42<00:13, 589.05batch/s][A
Training batches on cuda:0:  88%|████▍| 58808/66745 [01:42<00:13, 575.54batch/s][A
Training batches on cuda:0:  88%|████▍| 58867/66745 [01:43<00:13, 579.38batch/s][A
Training batches on cuda:0:  88%|████▍| 58926/66745 [01:43<00:13, 579.92batc

Training batches on cuda:0:  96%|████▊| 63932/66745 [01:51<00:04, 575.53batch/s][A
Training batches on cuda:0:  96%|████▊| 63990/66745 [01:52<00:04, 573.15batch/s][A
Training batches on cuda:0:  96%|████▊| 64048/66745 [01:52<00:04, 571.73batch/s][A
Training batches on cuda:0:  96%|████▊| 64107/66745 [01:52<00:04, 575.51batch/s][A
Training batches on cuda:0:  96%|████▊| 64168/66745 [01:52<00:04, 582.94batch/s][A
Training batches on cuda:0:  96%|████▊| 64227/66745 [01:52<00:04, 574.25batch/s][A
Training batches on cuda:0:  96%|████▊| 64286/66745 [01:52<00:04, 578.37batch/s][A
Training batches on cuda:0:  96%|████▊| 64344/66745 [01:52<00:04, 569.86batch/s][A
Training batches on cuda:0:  96%|████▊| 64402/66745 [01:52<00:04, 566.06batch/s][A
Training batches on cuda:0:  97%|████▊| 64459/66745 [01:52<00:04, 563.78batch/s][A
Training batches on cuda:0:  97%|████▊| 64516/66745 [01:52<00:03, 561.88batch/s][A
Training batches on cuda:0:  97%|████▊| 64573/66745 [01:53<00:03, 550.57batc

Training batches on cuda:0:   4%|▏     | 2618/66745 [00:04<02:02, 523.18batch/s][A
Training batches on cuda:0:   4%|▏     | 2671/66745 [00:04<02:02, 522.02batch/s][A
Training batches on cuda:0:   4%|▏     | 2724/66745 [00:05<02:02, 521.74batch/s][A
Training batches on cuda:0:   4%|▏     | 2777/66745 [00:05<02:02, 521.49batch/s][A
Training batches on cuda:0:   4%|▎     | 2830/66745 [00:05<02:03, 517.30batch/s][A
Training batches on cuda:0:   4%|▎     | 2883/66745 [00:05<02:03, 519.19batch/s][A
Training batches on cuda:0:   4%|▎     | 2937/66745 [00:05<02:01, 524.63batch/s][A
Training batches on cuda:0:   4%|▎     | 2991/66745 [00:05<02:00, 528.86batch/s][A
Training batches on cuda:0:   5%|▎     | 3047/66745 [00:05<01:58, 537.17batch/s][A
Training batches on cuda:0:   5%|▎     | 3106/66745 [00:05<01:55, 551.37batch/s][A
Training batches on cuda:0:   5%|▎     | 3165/66745 [00:05<01:52, 562.69batch/s][A
Training batches on cuda:0:   5%|▎     | 3224/66745 [00:05<01:51, 569.69batc

Training batches on cuda:0:  12%|▋     | 8324/66745 [00:14<01:49, 534.50batch/s][A
Training batches on cuda:0:  13%|▊     | 8378/66745 [00:14<01:49, 533.87batch/s][A
Training batches on cuda:0:  13%|▊     | 8432/66745 [00:14<01:49, 534.15batch/s][A
Training batches on cuda:0:  13%|▊     | 8486/66745 [00:14<01:48, 535.86batch/s][A
Training batches on cuda:0:  13%|▊     | 8541/66745 [00:15<01:47, 539.79batch/s][A
Training batches on cuda:0:  13%|▊     | 8599/66745 [00:15<01:45, 550.09batch/s][A
Training batches on cuda:0:  13%|▊     | 8658/66745 [00:15<01:43, 561.28batch/s][A
Training batches on cuda:0:  13%|▊     | 8717/66745 [00:15<01:42, 568.84batch/s][A
Training batches on cuda:0:  13%|▊     | 8776/66745 [00:15<01:41, 573.64batch/s][A
Training batches on cuda:0:  13%|▊     | 8835/66745 [00:15<01:40, 577.52batch/s][A
Training batches on cuda:0:  13%|▊     | 8895/66745 [00:15<01:39, 581.56batch/s][A
Training batches on cuda:0:  13%|▊     | 8954/66745 [00:15<01:39, 583.43batc

Training batches on cuda:0:  21%|█    | 14073/66745 [00:24<01:29, 585.63batch/s][A
Training batches on cuda:0:  21%|█    | 14132/66745 [00:24<01:29, 586.39batch/s][A
Training batches on cuda:0:  21%|█    | 14191/66745 [00:24<01:30, 582.60batch/s][A
Training batches on cuda:0:  21%|█    | 14250/66745 [00:24<01:29, 584.48batch/s][A
Training batches on cuda:0:  21%|█    | 14309/66745 [00:24<01:29, 585.56batch/s][A
Training batches on cuda:0:  22%|█    | 14369/66745 [00:25<01:29, 588.12batch/s][A
Training batches on cuda:0:  22%|█    | 14430/66745 [00:25<01:27, 594.64batch/s][A
Training batches on cuda:0:  22%|█    | 14492/66745 [00:25<01:27, 599.76batch/s][A
Training batches on cuda:0:  22%|█    | 14552/66745 [00:25<01:29, 585.16batch/s][A
Training batches on cuda:0:  22%|█    | 14611/66745 [00:25<01:29, 585.72batch/s][A
Training batches on cuda:0:  22%|█    | 14670/66745 [00:25<01:28, 586.43batch/s][A
Training batches on cuda:0:  22%|█    | 14729/66745 [00:25<01:28, 584.96batc

Training batches on cuda:0:  30%|█▍   | 19850/66745 [00:34<01:22, 570.85batch/s][A
Training batches on cuda:0:  30%|█▍   | 19908/66745 [00:34<01:22, 566.58batch/s][A
Training batches on cuda:0:  30%|█▍   | 19965/66745 [00:34<01:22, 563.84batch/s][A
Training batches on cuda:0:  30%|█▌   | 20025/66745 [00:34<01:21, 573.14batch/s][A
Training batches on cuda:0:  30%|█▌   | 20083/66745 [00:34<01:21, 574.95batch/s][A
Training batches on cuda:0:  30%|█▌   | 20141/66745 [00:34<01:21, 574.44batch/s][A
Training batches on cuda:0:  30%|█▌   | 20199/66745 [00:34<01:22, 566.45batch/s][A
Training batches on cuda:0:  30%|█▌   | 20258/66745 [00:35<01:21, 572.21batch/s][A
Training batches on cuda:0:  30%|█▌   | 20318/66745 [00:35<01:20, 580.25batch/s][A
Training batches on cuda:0:  31%|█▌   | 20380/66745 [00:35<01:18, 590.47batch/s][A
Training batches on cuda:0:  31%|█▌   | 20441/66745 [00:35<01:17, 594.62batch/s][A
Training batches on cuda:0:  31%|█▌   | 20501/66745 [00:35<01:17, 592.99batc

Training batches on cuda:0:  38%|█▉   | 25656/66745 [00:44<01:12, 569.79batch/s][A
Training batches on cuda:0:  39%|█▉   | 25716/66745 [00:44<01:11, 575.80batch/s][A
Training batches on cuda:0:  39%|█▉   | 25776/66745 [00:44<01:10, 580.07batch/s][A
Training batches on cuda:0:  39%|█▉   | 25836/66745 [00:44<01:10, 583.24batch/s][A
Training batches on cuda:0:  39%|█▉   | 25896/66745 [00:44<01:09, 585.96batch/s][A
Training batches on cuda:0:  39%|█▉   | 25956/66745 [00:44<01:09, 587.70batch/s][A
Training batches on cuda:0:  39%|█▉   | 26016/66745 [00:44<01:09, 589.34batch/s][A
Training batches on cuda:0:  39%|█▉   | 26075/66745 [00:44<01:09, 589.17batch/s][A
Training batches on cuda:0:  39%|█▉   | 26134/66745 [00:45<01:09, 588.52batch/s][A
Training batches on cuda:0:  39%|█▉   | 26193/66745 [00:45<01:08, 587.76batch/s][A
Training batches on cuda:0:  39%|█▉   | 26252/66745 [00:45<01:08, 587.91batch/s][A
Training batches on cuda:0:  39%|█▉   | 26311/66745 [00:45<01:08, 588.33batc

Training batches on cuda:0:  47%|██▎  | 31426/66745 [00:54<01:02, 569.25batch/s][A
Training batches on cuda:0:  47%|██▎  | 31483/66745 [00:54<01:02, 561.56batch/s][A
Training batches on cuda:0:  47%|██▎  | 31540/66745 [00:54<01:03, 555.50batch/s][A
Training batches on cuda:0:  47%|██▎  | 31596/66745 [00:54<01:03, 551.07batch/s][A
Training batches on cuda:0:  47%|██▎  | 31656/66745 [00:54<01:02, 562.71batch/s][A
Training batches on cuda:0:  48%|██▍  | 31715/66745 [00:54<01:01, 570.38batch/s][A
Training batches on cuda:0:  48%|██▍  | 31773/66745 [00:54<01:01, 571.07batch/s][A
Training batches on cuda:0:  48%|██▍  | 31832/66745 [00:54<01:00, 575.98batch/s][A
Training batches on cuda:0:  48%|██▍  | 31891/66745 [00:54<01:00, 579.61batch/s][A
Training batches on cuda:0:  48%|██▍  | 31951/66745 [00:55<00:59, 582.72batch/s][A
Training batches on cuda:0:  48%|██▍  | 32010/66745 [00:55<00:59, 584.36batch/s][A
Training batches on cuda:0:  48%|██▍  | 32069/66745 [00:55<00:59, 585.58batc

Training batches on cuda:0:  56%|██▊  | 37190/66745 [01:03<00:50, 582.28batch/s][A
Training batches on cuda:0:  56%|██▊  | 37249/66745 [01:04<00:50, 581.54batch/s][A
Training batches on cuda:0:  56%|██▊  | 37309/66745 [01:04<00:50, 584.49batch/s][A
Training batches on cuda:0:  56%|██▊  | 37368/66745 [01:04<00:50, 585.75batch/s][A
Training batches on cuda:0:  56%|██▊  | 37428/66745 [01:04<00:49, 587.05batch/s][A
Training batches on cuda:0:  56%|██▊  | 37488/66745 [01:04<00:49, 587.98batch/s][A
Training batches on cuda:0:  56%|██▊  | 37547/66745 [01:04<00:50, 580.42batch/s][A
Training batches on cuda:0:  56%|██▊  | 37606/66745 [01:04<00:50, 580.09batch/s][A
Training batches on cuda:0:  56%|██▊  | 37665/66745 [01:04<00:50, 580.44batch/s][A
Training batches on cuda:0:  57%|██▊  | 37724/66745 [01:04<00:49, 581.04batch/s][A
Training batches on cuda:0:  57%|██▊  | 37783/66745 [01:04<00:49, 581.43batch/s][A
Training batches on cuda:0:  57%|██▊  | 37842/66745 [01:05<00:49, 581.72batc

Training batches on cuda:0:  64%|███▏ | 42915/66745 [01:13<00:42, 555.69batch/s][A
Training batches on cuda:0:  64%|███▏ | 42973/66745 [01:13<00:42, 562.24batch/s][A
Training batches on cuda:0:  64%|███▏ | 43032/66745 [01:14<00:41, 568.55batch/s][A
Training batches on cuda:0:  65%|███▏ | 43091/66745 [01:14<00:41, 572.87batch/s][A
Training batches on cuda:0:  65%|███▏ | 43150/66745 [01:14<00:40, 575.84batch/s][A
Training batches on cuda:0:  65%|███▏ | 43209/66745 [01:14<00:40, 578.21batch/s][A
Training batches on cuda:0:  65%|███▏ | 43268/66745 [01:14<00:40, 580.07batch/s][A
Training batches on cuda:0:  65%|███▏ | 43327/66745 [01:14<00:40, 581.19batch/s][A
Training batches on cuda:0:  65%|███▎ | 43386/66745 [01:14<00:40, 581.99batch/s][A
Training batches on cuda:0:  65%|███▎ | 43445/66745 [01:14<00:40, 577.48batch/s][A
Training batches on cuda:0:  65%|███▎ | 43503/66745 [01:14<00:40, 574.71batch/s][A
Training batches on cuda:0:  65%|███▎ | 43561/66745 [01:14<00:41, 564.34batc

Training batches on cuda:0:  73%|███▋ | 48628/66745 [01:23<00:31, 577.23batch/s][A
Training batches on cuda:0:  73%|███▋ | 48687/66745 [01:23<00:31, 578.74batch/s][A
Training batches on cuda:0:  73%|███▋ | 48746/66745 [01:23<00:31, 580.07batch/s][A
Training batches on cuda:0:  73%|███▋ | 48805/66745 [01:24<00:31, 572.72batch/s][A
Training batches on cuda:0:  73%|███▋ | 48863/66745 [01:24<00:31, 568.97batch/s][A
Training batches on cuda:0:  73%|███▋ | 48920/66745 [01:24<00:31, 566.29batch/s][A
Training batches on cuda:0:  73%|███▋ | 48978/66745 [01:24<00:31, 569.35batch/s][A
Training batches on cuda:0:  73%|███▋ | 49036/66745 [01:24<00:30, 571.64batch/s][A
Training batches on cuda:0:  74%|███▋ | 49094/66745 [01:24<00:30, 574.01batch/s][A
Training batches on cuda:0:  74%|███▋ | 49152/66745 [01:24<00:30, 575.27batch/s][A
Training batches on cuda:0:  74%|███▋ | 49210/66745 [01:24<00:30, 575.68batch/s][A
Training batches on cuda:0:  74%|███▋ | 49268/66745 [01:24<00:30, 575.60batc

Training batches on cuda:0:  81%|████ | 54323/66745 [01:33<00:22, 556.32batch/s][A
Training batches on cuda:0:  81%|████ | 54379/66745 [01:33<00:22, 551.07batch/s][A
Training batches on cuda:0:  82%|████ | 54435/66745 [01:33<00:22, 546.20batch/s][A
Training batches on cuda:0:  82%|████ | 54490/66745 [01:33<00:22, 541.29batch/s][A
Training batches on cuda:0:  82%|████ | 54545/66745 [01:34<00:22, 537.31batch/s][A
Training batches on cuda:0:  82%|████ | 54599/66745 [01:34<00:22, 535.54batch/s][A
Training batches on cuda:0:  82%|████ | 54653/66745 [01:34<00:22, 531.61batch/s][A
Training batches on cuda:0:  82%|████ | 54707/66745 [01:34<00:22, 525.32batch/s][A
Training batches on cuda:0:  82%|████ | 54764/66745 [01:34<00:22, 535.72batch/s][A
Training batches on cuda:0:  82%|████ | 54823/66745 [01:34<00:21, 549.95batch/s][A
Training batches on cuda:0:  82%|████ | 54882/66745 [01:34<00:21, 560.75batch/s][A
Training batches on cuda:0:  82%|████ | 54941/66745 [01:34<00:20, 567.84batc

Training batches on cuda:0:  90%|████▍| 59959/66745 [01:43<00:11, 578.92batch/s][A
Training batches on cuda:0:  90%|████▍| 60017/66745 [01:43<00:11, 567.82batch/s][A
Training batches on cuda:0:  90%|████▌| 60074/66745 [01:43<00:11, 563.54batch/s][A
Training batches on cuda:0:  90%|████▌| 60131/66745 [01:43<00:11, 557.83batch/s][A
Training batches on cuda:0:  90%|████▌| 60190/66745 [01:43<00:11, 565.69batch/s][A
Training batches on cuda:0:  90%|████▌| 60248/66745 [01:44<00:11, 569.74batch/s][A
Training batches on cuda:0:  90%|████▌| 60307/66745 [01:44<00:11, 573.85batch/s][A
Training batches on cuda:0:  90%|████▌| 60366/66745 [01:44<00:11, 577.02batch/s][A
Training batches on cuda:0:  91%|████▌| 60425/66745 [01:44<00:10, 579.53batch/s][A
Training batches on cuda:0:  91%|████▌| 60484/66745 [01:44<00:10, 581.21batch/s][A
Training batches on cuda:0:  91%|████▌| 60543/66745 [01:44<00:10, 582.27batch/s][A
Training batches on cuda:0:  91%|████▌| 60602/66745 [01:44<00:10, 571.28batc

Training batches on cuda:0:  98%|████▉| 65580/66745 [01:53<00:02, 551.77batch/s][A
Training batches on cuda:0:  98%|████▉| 65636/66745 [01:53<00:02, 548.79batch/s][A
Training batches on cuda:0:  98%|████▉| 65695/66745 [01:53<00:01, 558.18batch/s][A
Training batches on cuda:0:  99%|████▉| 65754/66745 [01:53<00:01, 564.97batch/s][A
Training batches on cuda:0:  99%|████▉| 65811/66745 [01:53<00:01, 565.05batch/s][A
Training batches on cuda:0:  99%|████▉| 65868/66745 [01:54<00:01, 556.03batch/s][A
Training batches on cuda:0:  99%|████▉| 65927/66745 [01:54<00:01, 563.43batch/s][A
Training batches on cuda:0:  99%|████▉| 65984/66745 [01:54<00:01, 564.84batch/s][A
Training batches on cuda:0:  99%|████▉| 66041/66745 [01:54<00:01, 565.18batch/s][A
Training batches on cuda:0:  99%|████▉| 66102/66745 [01:54<00:01, 576.17batch/s][A
Training batches on cuda:0:  99%|████▉| 66161/66745 [01:54<00:01, 579.70batch/s][A
Training batches on cuda:0:  99%|████▉| 66220/66745 [01:54<00:00, 582.59batc

Training batches on cuda:0:   7%|▍     | 4388/66745 [00:07<01:50, 566.42batch/s][A
Training batches on cuda:0:   7%|▍     | 4447/66745 [00:07<01:48, 573.03batch/s][A
Training batches on cuda:0:   7%|▍     | 4505/66745 [00:07<01:48, 574.96batch/s][A
Training batches on cuda:0:   7%|▍     | 4563/66745 [00:07<01:48, 572.20batch/s][A
Training batches on cuda:0:   7%|▍     | 4625/66745 [00:08<01:46, 584.29batch/s][A
Training batches on cuda:0:   7%|▍     | 4687/66745 [00:08<01:44, 592.30batch/s][A
Training batches on cuda:0:   7%|▍     | 4747/66745 [00:08<01:45, 587.16batch/s][A
Training batches on cuda:0:   7%|▍     | 4806/66745 [00:08<01:45, 587.17batch/s][A
Training batches on cuda:0:   7%|▍     | 4865/66745 [00:08<01:46, 579.12batch/s][A
Training batches on cuda:0:   7%|▍     | 4924/66745 [00:08<01:46, 581.37batch/s][A
Training batches on cuda:0:   7%|▍     | 4986/66745 [00:08<01:44, 590.12batch/s][A
Training batches on cuda:0:   8%|▍     | 5047/66745 [00:08<01:43, 595.21batc

Training batches on cuda:0:  15%|▊    | 10143/66745 [00:17<01:37, 578.65batch/s][A
Training batches on cuda:0:  15%|▊    | 10204/66745 [00:17<01:36, 586.58batch/s][A
Training batches on cuda:0:  15%|▊    | 10263/66745 [00:17<01:37, 578.00batch/s][A
Training batches on cuda:0:  15%|▊    | 10323/66745 [00:17<01:36, 582.12batch/s][A
Training batches on cuda:0:  16%|▊    | 10382/66745 [00:17<01:36, 583.21batch/s][A
Training batches on cuda:0:  16%|▊    | 10441/66745 [00:18<01:38, 573.62batch/s][A
Training batches on cuda:0:  16%|▊    | 10499/66745 [00:18<01:38, 572.12batch/s][A
Training batches on cuda:0:  16%|▊    | 10559/66745 [00:18<01:37, 578.31batch/s][A
Training batches on cuda:0:  16%|▊    | 10619/66745 [00:18<01:36, 582.74batch/s][A
Training batches on cuda:0:  16%|▊    | 10679/66745 [00:18<01:35, 586.18batch/s][A
Training batches on cuda:0:  16%|▊    | 10739/66745 [00:18<01:35, 588.45batch/s][A
Training batches on cuda:0:  16%|▊    | 10799/66745 [00:18<01:34, 589.12batc

Training batches on cuda:0:  24%|█▏   | 15909/66745 [00:27<01:26, 590.87batch/s][A
Training batches on cuda:0:  24%|█▏   | 15969/66745 [00:27<01:26, 589.24batch/s][A
Training batches on cuda:0:  24%|█▏   | 16028/66745 [00:27<01:28, 574.40batch/s][A
Training batches on cuda:0:  24%|█▏   | 16086/66745 [00:27<01:28, 571.34batch/s][A
Training batches on cuda:0:  24%|█▏   | 16145/66745 [00:27<01:27, 575.63batch/s][A
Training batches on cuda:0:  24%|█▏   | 16204/66745 [00:27<01:27, 578.59batch/s][A
Training batches on cuda:0:  24%|█▏   | 16262/66745 [00:28<01:27, 574.08batch/s][A
Training batches on cuda:0:  24%|█▏   | 16320/66745 [00:28<01:28, 568.30batch/s][A
Training batches on cuda:0:  25%|█▏   | 16379/66745 [00:28<01:27, 573.50batch/s][A
Training batches on cuda:0:  25%|█▏   | 16438/66745 [00:28<01:27, 575.65batch/s][A
Training batches on cuda:0:  25%|█▏   | 16496/66745 [00:28<01:28, 564.62batch/s][A
Training batches on cuda:0:  25%|█▏   | 16555/66745 [00:28<01:28, 569.46batc

Training batches on cuda:0:  32%|█▌   | 21653/66745 [00:37<01:17, 584.63batch/s][A
Training batches on cuda:0:  33%|█▋   | 21712/66745 [00:37<01:16, 585.47batch/s][A
Training batches on cuda:0:  33%|█▋   | 21771/66745 [00:37<01:16, 586.72batch/s][A
Training batches on cuda:0:  33%|█▋   | 21830/66745 [00:37<01:16, 587.15batch/s][A
Training batches on cuda:0:  33%|█▋   | 21889/66745 [00:37<01:16, 587.66batch/s][A
Training batches on cuda:0:  33%|█▋   | 21948/66745 [00:37<01:16, 585.05batch/s][A
Training batches on cuda:0:  33%|█▋   | 22008/66745 [00:37<01:15, 588.69batch/s][A
Training batches on cuda:0:  33%|█▋   | 22068/66745 [00:38<01:15, 590.66batch/s][A
Training batches on cuda:0:  33%|█▋   | 22128/66745 [00:38<01:15, 592.34batch/s][A
Training batches on cuda:0:  33%|█▋   | 22188/66745 [00:38<01:15, 593.57batch/s][A
Training batches on cuda:0:  33%|█▋   | 22248/66745 [00:38<01:15, 592.48batch/s][A
Training batches on cuda:0:  33%|█▋   | 22308/66745 [00:38<01:15, 591.68batc

Training batches on cuda:0:  41%|██   | 27335/66745 [00:47<01:07, 586.07batch/s][A
Training batches on cuda:0:  41%|██   | 27394/66745 [00:47<01:07, 586.83batch/s][A
Training batches on cuda:0:  41%|██   | 27453/66745 [00:47<01:06, 587.21batch/s][A
Training batches on cuda:0:  41%|██   | 27512/66745 [00:47<01:06, 586.93batch/s][A
Training batches on cuda:0:  41%|██   | 27571/66745 [00:47<01:06, 586.32batch/s][A
Training batches on cuda:0:  41%|██   | 27630/66745 [00:47<01:07, 581.59batch/s][A
Training batches on cuda:0:  41%|██   | 27689/66745 [00:47<01:06, 583.25batch/s][A
Training batches on cuda:0:  42%|██   | 27748/66745 [00:47<01:06, 584.48batch/s][A
Training batches on cuda:0:  42%|██   | 27807/66745 [00:47<01:06, 585.00batch/s][A
Training batches on cuda:0:  42%|██   | 27866/66745 [00:48<01:06, 585.40batch/s][A
Training batches on cuda:0:  42%|██   | 27925/66745 [00:48<01:06, 585.48batch/s][A
Training batches on cuda:0:  42%|██   | 27984/66745 [00:48<01:06, 586.27batc

Training batches on cuda:0:  50%|██▍  | 33069/66745 [00:57<00:58, 579.84batch/s][A
Training batches on cuda:0:  50%|██▍  | 33128/66745 [00:57<00:57, 581.09batch/s][A
Training batches on cuda:0:  50%|██▍  | 33187/66745 [00:57<00:58, 576.33batch/s][A
Training batches on cuda:0:  50%|██▍  | 33245/66745 [00:57<00:58, 577.13batch/s][A
Training batches on cuda:0:  50%|██▍  | 33304/66745 [00:57<00:57, 578.19batch/s][A
Training batches on cuda:0:  50%|██▍  | 33363/66745 [00:57<00:57, 578.93batch/s][A
Training batches on cuda:0:  50%|██▌  | 33421/66745 [00:57<00:57, 575.62batch/s][A
Training batches on cuda:0:  50%|██▌  | 33479/66745 [00:57<00:57, 576.88batch/s][A
Training batches on cuda:0:  50%|██▌  | 33537/66745 [00:57<00:57, 573.43batch/s][A
Training batches on cuda:0:  50%|██▌  | 33596/66745 [00:57<00:57, 575.59batch/s][A
Training batches on cuda:0:  50%|██▌  | 33655/66745 [00:58<00:57, 576.99batch/s][A
Training batches on cuda:0:  51%|██▌  | 33714/66745 [00:58<00:57, 578.01batc

Training batches on cuda:0:  58%|██▉  | 38804/66745 [01:06<00:48, 578.69batch/s][A
Training batches on cuda:0:  58%|██▉  | 38862/66745 [01:06<00:48, 578.13batch/s][A
Training batches on cuda:0:  58%|██▉  | 38920/66745 [01:07<00:48, 578.12batch/s][A
Training batches on cuda:0:  58%|██▉  | 38978/66745 [01:07<00:48, 578.36batch/s][A
Training batches on cuda:0:  58%|██▉  | 39037/66745 [01:07<00:47, 578.92batch/s][A
Training batches on cuda:0:  59%|██▉  | 39095/66745 [01:07<00:47, 578.51batch/s][A
Training batches on cuda:0:  59%|██▉  | 39153/66745 [01:07<00:47, 578.32batch/s][A
Training batches on cuda:0:  59%|██▉  | 39211/66745 [01:07<00:47, 578.41batch/s][A
Training batches on cuda:0:  59%|██▉  | 39269/66745 [01:07<00:47, 578.55batch/s][A
Training batches on cuda:0:  59%|██▉  | 39327/66745 [01:07<00:47, 578.84batch/s][A
Training batches on cuda:0:  59%|██▉  | 39385/66745 [01:07<00:47, 578.31batch/s][A
Training batches on cuda:0:  59%|██▉  | 39443/66745 [01:07<00:47, 573.60batc

Training batches on cuda:0:  67%|███▎ | 44495/66745 [01:16<00:38, 579.23batch/s][A
Training batches on cuda:0:  67%|███▎ | 44554/66745 [01:16<00:38, 579.81batch/s][A
Training batches on cuda:0:  67%|███▎ | 44615/66745 [01:16<00:37, 586.01batch/s][A
Training batches on cuda:0:  67%|███▎ | 44674/66745 [01:17<00:37, 582.74batch/s][A
Training batches on cuda:0:  67%|███▎ | 44733/66745 [01:17<00:37, 579.95batch/s][A
Training batches on cuda:0:  67%|███▎ | 44792/66745 [01:17<00:38, 571.34batch/s][A
Training batches on cuda:0:  67%|███▎ | 44850/66745 [01:17<00:38, 566.27batch/s][A
Training batches on cuda:0:  67%|███▎ | 44907/66745 [01:17<00:38, 564.63batch/s][A
Training batches on cuda:0:  67%|███▎ | 44966/66745 [01:17<00:38, 571.96batch/s][A
Training batches on cuda:0:  67%|███▎ | 45026/66745 [01:17<00:37, 578.51batch/s][A
Training batches on cuda:0:  68%|███▍ | 45085/66745 [01:17<00:37, 579.26batch/s][A
Training batches on cuda:0:  68%|███▍ | 45143/66745 [01:17<00:38, 565.11batc

Training batches on cuda:0:  75%|███▊ | 50133/66745 [01:26<00:28, 572.87batch/s][A
Training batches on cuda:0:  75%|███▊ | 50192/66745 [01:26<00:28, 577.64batch/s][A
Training batches on cuda:0:  75%|███▊ | 50251/66745 [01:26<00:28, 581.11batch/s][A
Training batches on cuda:0:  75%|███▊ | 50311/66745 [01:27<00:28, 585.14batch/s][A
Training batches on cuda:0:  75%|███▊ | 50371/66745 [01:27<00:27, 588.38batch/s][A
Training batches on cuda:0:  76%|███▊ | 50430/66745 [01:27<00:28, 579.63batch/s][A
Training batches on cuda:0:  76%|███▊ | 50489/66745 [01:27<00:28, 574.44batch/s][A
Training batches on cuda:0:  76%|███▊ | 50547/66745 [01:27<00:28, 566.87batch/s][A
Training batches on cuda:0:  76%|███▊ | 50605/66745 [01:27<00:28, 568.84batch/s][A
Training batches on cuda:0:  76%|███▊ | 50663/66745 [01:27<00:28, 570.74batch/s][A
Training batches on cuda:0:  76%|███▊ | 50721/66745 [01:27<00:28, 570.09batch/s][A
Training batches on cuda:0:  76%|███▊ | 50779/66745 [01:27<00:28, 566.02batc

Training batches on cuda:0:  84%|████▏| 55800/66745 [01:36<00:18, 578.32batch/s][A
Training batches on cuda:0:  84%|████▏| 55859/66745 [01:36<00:18, 580.20batch/s][A
Training batches on cuda:0:  84%|████▏| 55918/66745 [01:36<00:18, 581.67batch/s][A
Training batches on cuda:0:  84%|████▏| 55977/66745 [01:36<00:18, 582.90batch/s][A
Training batches on cuda:0:  84%|████▏| 56036/66745 [01:37<00:18, 583.59batch/s][A
Training batches on cuda:0:  84%|████▏| 56095/66745 [01:37<00:18, 584.14batch/s][A
Training batches on cuda:0:  84%|████▏| 56154/66745 [01:37<00:18, 584.67batch/s][A
Training batches on cuda:0:  84%|████▏| 56213/66745 [01:37<00:18, 585.11batch/s][A
Training batches on cuda:0:  84%|████▏| 56272/66745 [01:37<00:18, 573.76batch/s][A
Training batches on cuda:0:  84%|████▏| 56330/66745 [01:37<00:18, 569.65batch/s][A
Training batches on cuda:0:  84%|████▏| 56388/66745 [01:37<00:18, 559.00batch/s][A
Training batches on cuda:0:  85%|████▏| 56446/66745 [01:37<00:18, 564.12batc

Training batches on cuda:0:  92%|████▌| 61490/66745 [01:46<00:09, 562.10batch/s][A
Training batches on cuda:0:  92%|████▌| 61547/66745 [01:46<00:09, 560.45batch/s][A
Training batches on cuda:0:  92%|████▌| 61604/66745 [01:46<00:09, 558.61batch/s][A
Training batches on cuda:0:  92%|████▌| 61660/66745 [01:46<00:09, 557.32batch/s][A
Training batches on cuda:0:  92%|████▌| 61716/66745 [01:46<00:09, 554.71batch/s][A
Training batches on cuda:0:  93%|████▋| 61772/66745 [01:47<00:09, 551.21batch/s][A
Training batches on cuda:0:  93%|████▋| 61828/66745 [01:47<00:08, 549.11batch/s][A
Training batches on cuda:0:  93%|████▋| 61883/66745 [01:47<00:08, 547.28batch/s][A
Training batches on cuda:0:  93%|████▋| 61941/66745 [01:47<00:08, 554.33batch/s][A
Training batches on cuda:0:  93%|████▋| 61997/66745 [01:47<00:08, 547.96batch/s][A
Training batches on cuda:0:  93%|████▋| 62055/66745 [01:47<00:08, 556.57batch/s][A
Training batches on cuda:0:  93%|████▋| 62111/66745 [01:47<00:08, 554.62batc

Training batches on cuda:0:   0%|       | 182/66745 [00:00<02:13, 497.35batch/s][A
Training batches on cuda:0:   0%|       | 240/66745 [00:00<02:06, 526.41batch/s][A
Training batches on cuda:0:   0%|       | 299/66745 [00:00<02:01, 544.81batch/s][A
Training batches on cuda:0:   1%|       | 357/66745 [00:00<01:59, 554.89batch/s][A
Training batches on cuda:0:   1%|       | 414/66745 [00:00<01:58, 559.22batch/s][A
Training batches on cuda:0:   1%|       | 471/66745 [00:00<01:57, 562.46batch/s][A
Training batches on cuda:0:   1%|       | 529/66745 [00:01<01:57, 564.91batch/s][A
Training batches on cuda:0:   1%|       | 586/66745 [00:01<01:56, 565.80batch/s][A
Training batches on cuda:0:   1%|       | 644/66745 [00:01<01:56, 567.25batch/s][A
Training batches on cuda:0:   1%|       | 701/66745 [00:01<01:56, 567.87batch/s][A
Training batches on cuda:0:   1%|       | 758/66745 [00:01<01:56, 568.26batch/s][A
Training batches on cuda:0:   1%|       | 816/66745 [00:01<01:55, 568.97batc

Training batches on cuda:0:   9%|▌     | 5843/66745 [00:10<01:47, 564.37batch/s][A
Training batches on cuda:0:   9%|▌     | 5900/66745 [00:10<01:49, 555.93batch/s][A
Training batches on cuda:0:   9%|▌     | 5956/66745 [00:10<01:49, 557.08batch/s][A
Training batches on cuda:0:   9%|▌     | 6017/66745 [00:10<01:46, 570.18batch/s][A
Training batches on cuda:0:   9%|▌     | 6076/66745 [00:10<01:45, 573.89batch/s][A
Training batches on cuda:0:   9%|▌     | 6136/66745 [00:10<01:44, 580.75batch/s][A
Training batches on cuda:0:   9%|▌     | 6195/66745 [00:10<01:46, 570.68batch/s][A
Training batches on cuda:0:   9%|▌     | 6255/66745 [00:10<01:44, 578.16batch/s][A
Training batches on cuda:0:   9%|▌     | 6315/66745 [00:11<01:43, 582.70batch/s][A
Training batches on cuda:0:  10%|▌     | 6374/66745 [00:11<01:43, 583.78batch/s][A
Training batches on cuda:0:  10%|▌     | 6433/66745 [00:11<01:43, 584.59batch/s][A
Training batches on cuda:0:  10%|▌     | 6492/66745 [00:11<01:44, 577.13batc

Training batches on cuda:0:  17%|▊    | 11537/66745 [00:20<01:38, 560.24batch/s][A
Training batches on cuda:0:  17%|▊    | 11594/66745 [00:20<01:39, 556.50batch/s][A
Training batches on cuda:0:  17%|▊    | 11654/66745 [00:20<01:36, 568.59batch/s][A
Training batches on cuda:0:  18%|▉    | 11711/66745 [00:20<01:37, 564.89batch/s][A
Training batches on cuda:0:  18%|▉    | 11768/66745 [00:20<01:37, 565.94batch/s][A
Training batches on cuda:0:  18%|▉    | 11825/66745 [00:20<01:38, 557.36batch/s][A
Training batches on cuda:0:  18%|▉    | 11882/66745 [00:20<01:37, 560.51batch/s][A
Training batches on cuda:0:  18%|▉    | 11939/66745 [00:20<01:37, 560.50batch/s][A
Training batches on cuda:0:  18%|▉    | 11996/66745 [00:20<01:39, 549.72batch/s][A
Training batches on cuda:0:  18%|▉    | 12052/66745 [00:21<01:40, 544.88batch/s][A
Training batches on cuda:0:  18%|▉    | 12110/66745 [00:21<01:38, 554.74batch/s][A
Training batches on cuda:0:  18%|▉    | 12169/66745 [00:21<01:37, 562.63batc

Training batches on cuda:0:  26%|█▎   | 17254/66745 [00:30<01:25, 581.00batch/s][A
Training batches on cuda:0:  26%|█▎   | 17313/66745 [00:30<01:25, 581.22batch/s][A
Training batches on cuda:0:  26%|█▎   | 17372/66745 [00:30<01:25, 577.47batch/s][A
Training batches on cuda:0:  26%|█▎   | 17431/66745 [00:30<01:25, 579.96batch/s][A
Training batches on cuda:0:  26%|█▎   | 17490/66745 [00:30<01:25, 578.20batch/s][A
Training batches on cuda:0:  26%|█▎   | 17548/66745 [00:30<01:26, 568.89batch/s][A
Training batches on cuda:0:  26%|█▎   | 17605/66745 [00:30<01:26, 568.19batch/s][A
Training batches on cuda:0:  26%|█▎   | 17662/66745 [00:30<01:26, 568.62batch/s][A
Training batches on cuda:0:  27%|█▎   | 17721/66745 [00:30<01:25, 574.11batch/s][A
Training batches on cuda:0:  27%|█▎   | 17780/66745 [00:30<01:24, 578.45batch/s][A
Training batches on cuda:0:  27%|█▎   | 17839/66745 [00:31<01:24, 580.86batch/s][A
Training batches on cuda:0:  27%|█▎   | 17898/66745 [00:31<01:23, 582.32batc

Training batches on cuda:0:  34%|█▋   | 22984/66745 [00:39<01:14, 585.96batch/s][A
Training batches on cuda:0:  35%|█▋   | 23043/66745 [00:40<01:14, 583.99batch/s][A
Training batches on cuda:0:  35%|█▋   | 23102/66745 [00:40<01:14, 583.97batch/s][A
Training batches on cuda:0:  35%|█▋   | 23161/66745 [00:40<01:15, 580.55batch/s][A
Training batches on cuda:0:  35%|█▋   | 23220/66745 [00:40<01:15, 575.07batch/s][A
Training batches on cuda:0:  35%|█▋   | 23278/66745 [00:40<01:15, 573.53batch/s][A
Training batches on cuda:0:  35%|█▋   | 23336/66745 [00:40<01:16, 569.63batch/s][A
Training batches on cuda:0:  35%|█▊   | 23395/66745 [00:40<01:15, 574.04batch/s][A
Training batches on cuda:0:  35%|█▊   | 23454/66745 [00:40<01:15, 577.21batch/s][A
Training batches on cuda:0:  35%|█▊   | 23513/66745 [00:40<01:14, 579.66batch/s][A
Training batches on cuda:0:  35%|█▊   | 23572/66745 [00:40<01:14, 581.06batch/s][A
Training batches on cuda:0:  35%|█▊   | 23631/66745 [00:41<01:14, 582.48batc

Training batches on cuda:0:  43%|██▏  | 28681/66745 [00:49<01:05, 581.82batch/s][A
Training batches on cuda:0:  43%|██▏  | 28741/66745 [00:49<01:04, 586.00batch/s][A
Training batches on cuda:0:  43%|██▏  | 28800/66745 [00:50<01:04, 585.27batch/s][A
Training batches on cuda:0:  43%|██▏  | 28860/66745 [00:50<01:04, 588.35batch/s][A
Training batches on cuda:0:  43%|██▏  | 28919/66745 [00:50<01:04, 587.43batch/s][A
Training batches on cuda:0:  43%|██▏  | 28978/66745 [00:50<01:04, 587.11batch/s][A
Training batches on cuda:0:  44%|██▏  | 29037/66745 [00:50<01:04, 587.42batch/s][A
Training batches on cuda:0:  44%|██▏  | 29096/66745 [00:50<01:04, 587.16batch/s][A
Training batches on cuda:0:  44%|██▏  | 29155/66745 [00:50<01:04, 586.23batch/s][A
Training batches on cuda:0:  44%|██▏  | 29214/66745 [00:50<01:05, 572.65batch/s][A
Training batches on cuda:0:  44%|██▏  | 29273/66745 [00:50<01:04, 577.67batch/s][A
Training batches on cuda:0:  44%|██▏  | 29333/66745 [00:51<01:04, 581.61batc

Training batches on cuda:0:  52%|██▌  | 34415/66745 [00:59<00:55, 585.72batch/s][A
Training batches on cuda:0:  52%|██▌  | 34474/66745 [00:59<00:55, 585.87batch/s][A
Training batches on cuda:0:  52%|██▌  | 34533/66745 [00:59<00:55, 579.56batch/s][A
Training batches on cuda:0:  52%|██▌  | 34594/66745 [01:00<00:54, 587.12batch/s][A
Training batches on cuda:0:  52%|██▌  | 34653/66745 [01:00<00:54, 587.86batch/s][A
Training batches on cuda:0:  52%|██▌  | 34712/66745 [01:00<00:54, 587.20batch/s][A
Training batches on cuda:0:  52%|██▌  | 34771/66745 [01:00<00:54, 587.72batch/s][A
Training batches on cuda:0:  52%|██▌  | 34830/66745 [01:00<00:54, 587.70batch/s][A
Training batches on cuda:0:  52%|██▌  | 34889/66745 [01:00<00:54, 587.07batch/s][A
Training batches on cuda:0:  52%|██▌  | 34948/66745 [01:00<00:54, 586.71batch/s][A
Training batches on cuda:0:  52%|██▌  | 35007/66745 [01:00<00:54, 586.48batch/s][A
Training batches on cuda:0:  53%|██▋  | 35066/66745 [01:00<00:54, 585.77batc

Training batches on cuda:0:  60%|███  | 40139/66745 [01:09<00:45, 581.12batch/s][A
Training batches on cuda:0:  60%|███  | 40198/66745 [01:09<00:45, 582.38batch/s][A
Training batches on cuda:0:  60%|███  | 40257/66745 [01:09<00:45, 579.81batch/s][A
Training batches on cuda:0:  60%|███  | 40316/66745 [01:09<00:45, 580.96batch/s][A
Training batches on cuda:0:  60%|███  | 40375/66745 [01:10<00:45, 575.62batch/s][A
Training batches on cuda:0:  61%|███  | 40433/66745 [01:10<00:45, 576.07batch/s][A
Training batches on cuda:0:  61%|███  | 40492/66745 [01:10<00:45, 578.08batch/s][A
Training batches on cuda:0:  61%|███  | 40551/66745 [01:10<00:45, 579.63batch/s][A
Training batches on cuda:0:  61%|███  | 40610/66745 [01:10<00:45, 580.77batch/s][A
Training batches on cuda:0:  61%|███  | 40669/66745 [01:10<00:44, 581.57batch/s][A
Training batches on cuda:0:  61%|███  | 40728/66745 [01:10<00:44, 582.31batch/s][A
Training batches on cuda:0:  61%|███  | 40787/66745 [01:10<00:44, 582.72batc

Training batches on cuda:0:  69%|███▍ | 45817/66745 [01:19<00:38, 537.50batch/s][A
Training batches on cuda:0:  69%|███▍ | 45871/66745 [01:19<00:39, 534.85batch/s][A
Training batches on cuda:0:  69%|███▍ | 45925/66745 [01:19<00:39, 532.82batch/s][A
Training batches on cuda:0:  69%|███▍ | 45980/66745 [01:19<00:38, 535.40batch/s][A
Training batches on cuda:0:  69%|███▍ | 46039/66745 [01:20<00:37, 549.70batch/s][A
Training batches on cuda:0:  69%|███▍ | 46095/66745 [01:20<00:37, 550.21batch/s][A
Training batches on cuda:0:  69%|███▍ | 46152/66745 [01:20<00:37, 554.03batch/s][A
Training batches on cuda:0:  69%|███▍ | 46208/66745 [01:20<00:37, 554.69batch/s][A
Training batches on cuda:0:  69%|███▍ | 46264/66745 [01:20<00:37, 551.40batch/s][A
Training batches on cuda:0:  69%|███▍ | 46323/66745 [01:20<00:36, 561.34batch/s][A
Training batches on cuda:0:  69%|███▍ | 46380/66745 [01:20<00:36, 561.06batch/s][A
Training batches on cuda:0:  70%|███▍ | 46438/66745 [01:20<00:35, 565.04batc

Training batches on cuda:0:  77%|███▊ | 51521/66745 [01:29<00:26, 570.04batch/s][A
Training batches on cuda:0:  77%|███▊ | 51580/66745 [01:29<00:26, 574.37batch/s][A
Training batches on cuda:0:  77%|███▊ | 51639/66745 [01:29<00:26, 577.71batch/s][A
Training batches on cuda:0:  77%|███▊ | 51698/66745 [01:29<00:25, 580.15batch/s][A
Training batches on cuda:0:  78%|███▉ | 51757/66745 [01:29<00:25, 582.07batch/s][A
Training batches on cuda:0:  78%|███▉ | 51818/66745 [01:29<00:25, 589.03batch/s][A
Training batches on cuda:0:  78%|███▉ | 51878/66745 [01:30<00:25, 590.85batch/s][A
Training batches on cuda:0:  78%|███▉ | 51939/66745 [01:30<00:24, 596.25batch/s][A
Training batches on cuda:0:  78%|███▉ | 51999/66745 [01:30<00:24, 596.47batch/s][A
Training batches on cuda:0:  78%|███▉ | 52060/66745 [01:30<00:24, 597.71batch/s][A
Training batches on cuda:0:  78%|███▉ | 52121/66745 [01:30<00:24, 599.26batch/s][A
Training batches on cuda:0:  78%|███▉ | 52181/66745 [01:30<00:25, 581.70batc

Training batches on cuda:0:  86%|████▎| 57285/66745 [01:39<00:16, 584.91batch/s][A
Training batches on cuda:0:  86%|████▎| 57344/66745 [01:39<00:16, 584.79batch/s][A
Training batches on cuda:0:  86%|████▎| 57403/66745 [01:39<00:16, 576.16batch/s][A
Training batches on cuda:0:  86%|████▎| 57461/66745 [01:39<00:16, 576.71batch/s][A
Training batches on cuda:0:  86%|████▎| 57522/66745 [01:39<00:15, 586.18batch/s][A
Training batches on cuda:0:  86%|████▎| 57581/66745 [01:39<00:15, 585.35batch/s][A
Training batches on cuda:0:  86%|████▎| 57640/66745 [01:39<00:15, 584.19batch/s][A
Training batches on cuda:0:  86%|████▎| 57699/66745 [01:40<00:15, 583.54batch/s][A
Training batches on cuda:0:  87%|████▎| 57758/66745 [01:40<00:15, 575.37batch/s][A
Training batches on cuda:0:  87%|████▎| 57817/66745 [01:40<00:15, 579.67batch/s][A
Training batches on cuda:0:  87%|████▎| 57876/66745 [01:40<00:15, 582.58batch/s][A
Training batches on cuda:0:  87%|████▎| 57936/66745 [01:40<00:15, 585.35batc

Training batches on cuda:0:  94%|████▋| 62997/66745 [01:49<00:06, 584.23batch/s][A
Training batches on cuda:0:  94%|████▋| 63056/66745 [01:49<00:06, 585.69batch/s][A
Training batches on cuda:0:  95%|████▋| 63115/66745 [01:49<00:06, 586.41batch/s][A
Training batches on cuda:0:  95%|████▋| 63174/66745 [01:49<00:06, 584.11batch/s][A
Training batches on cuda:0:  95%|████▋| 63233/66745 [01:49<00:06, 583.78batch/s][A
Training batches on cuda:0:  95%|████▋| 63292/66745 [01:49<00:05, 583.66batch/s][A
Training batches on cuda:0:  95%|████▋| 63351/66745 [01:49<00:05, 583.16batch/s][A
Training batches on cuda:0:  95%|████▊| 63410/66745 [01:49<00:05, 583.39batch/s][A
Training batches on cuda:0:  95%|████▊| 63469/66745 [01:50<00:05, 583.90batch/s][A
Training batches on cuda:0:  95%|████▊| 63528/66745 [01:50<00:05, 584.07batch/s][A
Training batches on cuda:0:  95%|████▊| 63587/66745 [01:50<00:05, 584.11batch/s][A
Training batches on cuda:0:  95%|████▊| 63646/66745 [01:50<00:05, 584.26batc

Training batches on cuda:0:   3%|▏     | 1795/66745 [00:03<01:53, 572.15batch/s][A
Training batches on cuda:0:   3%|▏     | 1853/66745 [00:03<01:54, 567.64batch/s][A
Training batches on cuda:0:   3%|▏     | 1910/66745 [00:03<01:54, 568.11batch/s][A
Training batches on cuda:0:   3%|▏     | 1967/66745 [00:03<01:54, 566.28batch/s][A
Training batches on cuda:0:   3%|▏     | 2026/66745 [00:03<01:53, 572.00batch/s][A
Training batches on cuda:0:   3%|▏     | 2084/66745 [00:03<01:52, 573.47batch/s][A
Training batches on cuda:0:   3%|▏     | 2142/66745 [00:03<01:52, 573.38batch/s][A
Training batches on cuda:0:   3%|▏     | 2201/66745 [00:03<01:51, 576.94batch/s][A
Training batches on cuda:0:   3%|▏     | 2260/66745 [00:04<01:51, 578.99batch/s][A
Training batches on cuda:0:   3%|▏     | 2318/66745 [00:04<01:52, 570.52batch/s][A
Training batches on cuda:0:   4%|▏     | 2376/66745 [00:04<01:54, 564.23batch/s][A
Training batches on cuda:0:   4%|▏     | 2434/66745 [00:04<01:53, 568.12batc

Training batches on cuda:0:  11%|▋     | 7391/66745 [00:13<01:44, 569.25batch/s][A
Training batches on cuda:0:  11%|▋     | 7448/66745 [00:13<01:44, 566.17batch/s][A
Training batches on cuda:0:  11%|▋     | 7507/66745 [00:13<01:43, 571.67batch/s][A
Training batches on cuda:0:  11%|▋     | 7566/66745 [00:13<01:42, 575.79batch/s][A
Training batches on cuda:0:  11%|▋     | 7625/66745 [00:13<01:42, 579.06batch/s][A
Training batches on cuda:0:  12%|▋     | 7684/66745 [00:13<01:41, 581.07batch/s][A
Training batches on cuda:0:  12%|▋     | 7743/66745 [00:13<01:41, 582.47batch/s][A
Training batches on cuda:0:  12%|▋     | 7802/66745 [00:13<01:41, 582.60batch/s][A
Training batches on cuda:0:  12%|▋     | 7861/66745 [00:14<01:41, 580.34batch/s][A
Training batches on cuda:0:  12%|▋     | 7920/66745 [00:14<01:41, 581.50batch/s][A
Training batches on cuda:0:  12%|▋     | 7979/66745 [00:14<01:40, 582.21batch/s][A
Training batches on cuda:0:  12%|▋     | 8038/66745 [00:14<01:42, 570.11batc

Training batches on cuda:0:  20%|▉    | 13116/66745 [00:23<01:31, 587.00batch/s][A
Training batches on cuda:0:  20%|▉    | 13175/66745 [00:23<01:31, 586.30batch/s][A
Training batches on cuda:0:  20%|▉    | 13235/66745 [00:23<01:31, 587.66batch/s][A
Training batches on cuda:0:  20%|▉    | 13295/66745 [00:23<01:30, 588.65batch/s][A
Training batches on cuda:0:  20%|█    | 13354/66745 [00:23<01:31, 581.35batch/s][A
Training batches on cuda:0:  20%|█    | 13414/66745 [00:23<01:31, 584.23batch/s][A
Training batches on cuda:0:  20%|█    | 13475/66745 [00:23<01:30, 588.97batch/s][A
Training batches on cuda:0:  20%|█    | 13535/66745 [00:23<01:30, 590.07batch/s][A
Training batches on cuda:0:  20%|█    | 13595/66745 [00:23<01:29, 590.64batch/s][A
Training batches on cuda:0:  20%|█    | 13656/66745 [00:23<01:29, 593.95batch/s][A
Training batches on cuda:0:  21%|█    | 13716/66745 [00:24<01:29, 591.50batch/s][A
Training batches on cuda:0:  21%|█    | 13776/66745 [00:24<01:29, 589.29batc

Training batches on cuda:0:  28%|█▍   | 18884/66745 [00:32<01:24, 564.64batch/s][A
Training batches on cuda:0:  28%|█▍   | 18941/66745 [00:33<01:25, 559.64batch/s][A
Training batches on cuda:0:  28%|█▍   | 19000/66745 [00:33<01:24, 566.15batch/s][A
Training batches on cuda:0:  29%|█▍   | 19059/66745 [00:33<01:23, 570.50batch/s][A
Training batches on cuda:0:  29%|█▍   | 19118/66745 [00:33<01:23, 573.48batch/s][A
Training batches on cuda:0:  29%|█▍   | 19177/66745 [00:33<01:22, 576.03batch/s][A
Training batches on cuda:0:  29%|█▍   | 19236/66745 [00:33<01:22, 577.67batch/s][A
Training batches on cuda:0:  29%|█▍   | 19295/66745 [00:33<01:21, 579.02batch/s][A
Training batches on cuda:0:  29%|█▍   | 19354/66745 [00:33<01:21, 579.97batch/s][A
Training batches on cuda:0:  29%|█▍   | 19413/66745 [00:33<01:21, 580.61batch/s][A
Training batches on cuda:0:  29%|█▍   | 19472/66745 [00:34<01:21, 580.94batch/s][A
Training batches on cuda:0:  29%|█▍   | 19531/66745 [00:34<01:21, 581.47batc

Training batches on cuda:0:  37%|█▊   | 24609/66745 [00:42<01:11, 587.25batch/s][A
Training batches on cuda:0:  37%|█▊   | 24668/66745 [00:42<01:12, 583.90batch/s][A
Training batches on cuda:0:  37%|█▊   | 24728/66745 [00:43<01:11, 585.93batch/s][A
Training batches on cuda:0:  37%|█▊   | 24788/66745 [00:43<01:11, 587.46batch/s][A
Training batches on cuda:0:  37%|█▊   | 24848/66745 [00:43<01:11, 588.36batch/s][A
Training batches on cuda:0:  37%|█▊   | 24907/66745 [00:43<01:11, 583.95batch/s][A
Training batches on cuda:0:  37%|█▊   | 24967/66745 [00:43<01:11, 585.84batch/s][A
Training batches on cuda:0:  37%|█▊   | 25026/66745 [00:43<01:11, 582.24batch/s][A
Training batches on cuda:0:  38%|█▉   | 25086/66745 [00:43<01:11, 585.18batch/s][A
Training batches on cuda:0:  38%|█▉   | 25145/66745 [00:43<01:10, 586.06batch/s][A
Training batches on cuda:0:  38%|█▉   | 25205/66745 [00:43<01:10, 587.49batch/s][A
Training batches on cuda:0:  38%|█▉   | 25264/66745 [00:43<01:11, 584.13batc

Training batches on cuda:0:  46%|██▎  | 30372/66745 [00:52<01:01, 592.87batch/s][A
Training batches on cuda:0:  46%|██▎  | 30433/66745 [00:52<01:00, 595.38batch/s][A
Training batches on cuda:0:  46%|██▎  | 30493/66745 [00:52<01:01, 585.47batch/s][A
Training batches on cuda:0:  46%|██▎  | 30552/66745 [00:53<01:01, 585.93batch/s][A
Training batches on cuda:0:  46%|██▎  | 30612/66745 [00:53<01:01, 588.16batch/s][A
Training batches on cuda:0:  46%|██▎  | 30672/66745 [00:53<01:01, 590.03batch/s][A
Training batches on cuda:0:  46%|██▎  | 30732/66745 [00:53<01:00, 591.29batch/s][A
Training batches on cuda:0:  46%|██▎  | 30794/66745 [00:53<01:00, 597.36batch/s][A
Training batches on cuda:0:  46%|██▎  | 30854/66745 [00:53<01:00, 598.06batch/s][A
Training batches on cuda:0:  46%|██▎  | 30914/66745 [00:53<01:00, 594.96batch/s][A
Training batches on cuda:0:  46%|██▎  | 30974/66745 [00:53<01:00, 591.26batch/s][A
Training batches on cuda:0:  46%|██▎  | 31034/66745 [00:53<01:00, 590.69batc

Training batches on cuda:0:  54%|██▋  | 36159/66745 [01:02<00:52, 583.91batch/s][A
Training batches on cuda:0:  54%|██▋  | 36218/66745 [01:02<00:53, 573.71batch/s][A
Training batches on cuda:0:  54%|██▋  | 36277/66745 [01:02<00:52, 578.30batch/s][A
Training batches on cuda:0:  54%|██▋  | 36336/66745 [01:02<00:52, 579.98batch/s][A
Training batches on cuda:0:  55%|██▋  | 36396/66745 [01:03<00:52, 583.00batch/s][A
Training batches on cuda:0:  55%|██▋  | 36455/66745 [01:03<00:52, 576.44batch/s][A
Training batches on cuda:0:  55%|██▋  | 36514/66745 [01:03<00:52, 580.35batch/s][A
Training batches on cuda:0:  55%|██▋  | 36573/66745 [01:03<00:51, 583.16batch/s][A
Training batches on cuda:0:  55%|██▋  | 36632/66745 [01:03<00:51, 585.12batch/s][A
Training batches on cuda:0:  55%|██▋  | 36691/66745 [01:03<00:51, 583.36batch/s][A
Training batches on cuda:0:  55%|██▊  | 36751/66745 [01:03<00:51, 585.44batch/s][A
Training batches on cuda:0:  55%|██▊  | 36810/66745 [01:03<00:51, 584.89batc

Training batches on cuda:0:  63%|███▏ | 41901/66745 [01:12<00:41, 593.94batch/s][A
Training batches on cuda:0:  63%|███▏ | 41962/66745 [01:12<00:41, 597.80batch/s][A
Training batches on cuda:0:  63%|███▏ | 42022/66745 [01:12<00:41, 596.90batch/s][A
Training batches on cuda:0:  63%|███▏ | 42084/66745 [01:12<00:41, 601.37batch/s][A
Training batches on cuda:0:  63%|███▏ | 42145/66745 [01:12<00:41, 595.60batch/s][A
Training batches on cuda:0:  63%|███▏ | 42205/66745 [01:13<00:42, 577.69batch/s][A
Training batches on cuda:0:  63%|███▏ | 42264/66745 [01:13<00:42, 579.17batch/s][A
Training batches on cuda:0:  63%|███▏ | 42323/66745 [01:13<00:42, 580.32batch/s][A
Training batches on cuda:0:  63%|███▏ | 42382/66745 [01:13<00:41, 582.86batch/s][A
Training batches on cuda:0:  64%|███▏ | 42441/66745 [01:13<00:41, 584.05batch/s][A
Training batches on cuda:0:  64%|███▏ | 42501/66745 [01:13<00:41, 586.98batch/s][A
Training batches on cuda:0:  64%|███▏ | 42560/66745 [01:13<00:41, 587.21batc

Training batches on cuda:0:  71%|███▌ | 47690/66745 [01:22<00:33, 567.65batch/s][A
Training batches on cuda:0:  72%|███▌ | 47749/66745 [01:22<00:33, 572.17batch/s][A
Training batches on cuda:0:  72%|███▌ | 47809/66745 [01:22<00:32, 578.72batch/s][A
Training batches on cuda:0:  72%|███▌ | 47869/66745 [01:22<00:32, 583.18batch/s][A
Training batches on cuda:0:  72%|███▌ | 47928/66745 [01:22<00:32, 584.98batch/s][A
Training batches on cuda:0:  72%|███▌ | 47988/66745 [01:22<00:31, 587.14batch/s][A
Training batches on cuda:0:  72%|███▌ | 48047/66745 [01:22<00:31, 586.24batch/s][A
Training batches on cuda:0:  72%|███▌ | 48106/66745 [01:23<00:31, 585.97batch/s][A
Training batches on cuda:0:  72%|███▌ | 48165/66745 [01:23<00:32, 576.35batch/s][A
Training batches on cuda:0:  72%|███▌ | 48224/66745 [01:23<00:31, 579.51batch/s][A
Training batches on cuda:0:  72%|███▌ | 48282/66745 [01:23<00:31, 578.15batch/s][A
Training batches on cuda:0:  72%|███▌ | 48340/66745 [01:23<00:31, 577.47batc

Training batches on cuda:0:  80%|████ | 53428/66745 [01:32<00:22, 587.48batch/s][A
Training batches on cuda:0:  80%|████ | 53487/66745 [01:32<00:22, 581.72batch/s][A
Training batches on cuda:0:  80%|████ | 53546/66745 [01:32<00:23, 570.83batch/s][A
Training batches on cuda:0:  80%|████ | 53604/66745 [01:32<00:23, 564.17batch/s][A
Training batches on cuda:0:  80%|████ | 53662/66745 [01:32<00:23, 567.15batch/s][A
Training batches on cuda:0:  80%|████ | 53720/66745 [01:32<00:22, 569.30batch/s][A
Training batches on cuda:0:  81%|████ | 53778/66745 [01:32<00:22, 570.08batch/s][A
Training batches on cuda:0:  81%|████ | 53836/66745 [01:32<00:22, 562.15batch/s][A
Training batches on cuda:0:  81%|████ | 53893/66745 [01:33<00:23, 550.33batch/s][A
Training batches on cuda:0:  81%|████ | 53949/66745 [01:33<00:23, 545.45batch/s][A
Training batches on cuda:0:  81%|████ | 54004/66745 [01:33<00:23, 542.80batch/s][A
Training batches on cuda:0:  81%|████ | 54059/66745 [01:33<00:23, 543.26batc

Training batches on cuda:0:  89%|████▍| 59101/66745 [01:42<00:13, 583.01batch/s][A
Training batches on cuda:0:  89%|████▍| 59160/66745 [01:42<00:12, 583.83batch/s][A
Training batches on cuda:0:  89%|████▍| 59219/66745 [01:42<00:13, 576.91batch/s][A
Training batches on cuda:0:  89%|████▍| 59277/66745 [01:42<00:13, 569.29batch/s][A
Training batches on cuda:0:  89%|████▍| 59336/66745 [01:42<00:12, 575.15batch/s][A
Training batches on cuda:0:  89%|████▍| 59396/66745 [01:42<00:12, 579.65batch/s][A
Training batches on cuda:0:  89%|████▍| 59455/66745 [01:42<00:12, 582.55batch/s][A
Training batches on cuda:0:  89%|████▍| 59514/66745 [01:42<00:12, 582.97batch/s][A
Training batches on cuda:0:  89%|████▍| 59573/66745 [01:42<00:12, 582.72batch/s][A
Training batches on cuda:0:  89%|████▍| 59633/66745 [01:43<00:12, 585.11batch/s][A
Training batches on cuda:0:  89%|████▍| 59693/66745 [01:43<00:12, 586.85batch/s][A
Training batches on cuda:0:  90%|████▍| 59752/66745 [01:43<00:11, 587.77batc

Training batches on cuda:0:  97%|████▊| 64839/66745 [01:51<00:03, 586.43batch/s][A
Training batches on cuda:0:  97%|████▊| 64899/66745 [01:52<00:03, 587.60batch/s][A
Training batches on cuda:0:  97%|████▊| 64959/66745 [01:52<00:03, 589.08batch/s][A
Training batches on cuda:0:  97%|████▊| 65019/66745 [01:52<00:02, 589.44batch/s][A
Training batches on cuda:0:  98%|████▉| 65078/66745 [01:52<00:02, 589.12batch/s][A
Training batches on cuda:0:  98%|████▉| 65137/66745 [01:52<00:02, 589.19batch/s][A
Training batches on cuda:0:  98%|████▉| 65196/66745 [01:52<00:02, 589.36batch/s][A
Training batches on cuda:0:  98%|████▉| 65255/66745 [01:52<00:02, 589.36batch/s][A
Training batches on cuda:0:  98%|████▉| 65315/66745 [01:52<00:02, 590.03batch/s][A
Training batches on cuda:0:  98%|████▉| 65375/66745 [01:52<00:02, 589.68batch/s][A
Training batches on cuda:0:  98%|████▉| 65434/66745 [01:52<00:02, 583.82batch/s][A
Training batches on cuda:0:  98%|████▉| 65493/66745 [01:53<00:02, 585.01batc

Training batches on cuda:0:   5%|▎     | 3627/66745 [00:06<01:51, 564.02batch/s][A
Training batches on cuda:0:   6%|▎     | 3686/66745 [00:06<01:50, 570.39batch/s][A
Training batches on cuda:0:   6%|▎     | 3745/66745 [00:06<01:49, 574.98batch/s][A
Training batches on cuda:0:   6%|▎     | 3803/66745 [00:06<01:51, 566.82batch/s][A
Training batches on cuda:0:   6%|▎     | 3860/66745 [00:06<01:52, 558.18batch/s][A
Training batches on cuda:0:   6%|▎     | 3916/66745 [00:06<01:53, 551.78batch/s][A
Training batches on cuda:0:   6%|▎     | 3974/66745 [00:07<01:52, 559.44batch/s][A
Training batches on cuda:0:   6%|▎     | 4034/66745 [00:07<01:50, 569.79batch/s][A
Training batches on cuda:0:   6%|▎     | 4092/66745 [00:07<01:49, 570.45batch/s][A
Training batches on cuda:0:   6%|▎     | 4150/66745 [00:07<01:49, 571.99batch/s][A
Training batches on cuda:0:   6%|▍     | 4208/66745 [00:07<01:49, 569.61batch/s][A
Training batches on cuda:0:   6%|▍     | 4267/66745 [00:07<01:48, 574.58batc

Training batches on cuda:0:  14%|▊     | 9273/66745 [00:16<01:42, 562.70batch/s][A
Training batches on cuda:0:  14%|▊     | 9333/66745 [00:16<01:40, 573.59batch/s][A
Training batches on cuda:0:  14%|▊     | 9392/66745 [00:16<01:39, 575.57batch/s][A
Training batches on cuda:0:  14%|▊     | 9452/66745 [00:16<01:38, 580.46batch/s][A
Training batches on cuda:0:  14%|▊     | 9511/66745 [00:16<01:38, 582.91batch/s][A
Training batches on cuda:0:  14%|▊     | 9571/66745 [00:16<01:37, 585.31batch/s][A
Training batches on cuda:0:  14%|▊     | 9630/66745 [00:16<01:37, 582.86batch/s][A
Training batches on cuda:0:  15%|▊     | 9689/66745 [00:17<01:39, 574.83batch/s][A
Training batches on cuda:0:  15%|▉     | 9748/66745 [00:17<01:38, 577.40batch/s][A
Training batches on cuda:0:  15%|▉     | 9806/66745 [00:17<01:40, 568.79batch/s][A
Training batches on cuda:0:  15%|▉     | 9863/66745 [00:17<01:41, 559.10batch/s][A
Training batches on cuda:0:  15%|▉     | 9922/66745 [00:17<01:40, 567.15batc

Training batches on cuda:0:  22%|█    | 14957/66745 [00:26<01:30, 574.45batch/s][A
Training batches on cuda:0:  22%|█    | 15015/66745 [00:26<01:31, 568.19batch/s][A
Training batches on cuda:0:  23%|█▏   | 15072/66745 [00:26<01:32, 560.18batch/s][A
Training batches on cuda:0:  23%|█▏   | 15129/66745 [00:26<01:32, 560.88batch/s][A
Training batches on cuda:0:  23%|█▏   | 15186/66745 [00:26<01:31, 562.56batch/s][A
Training batches on cuda:0:  23%|█▏   | 15243/66745 [00:26<01:31, 563.64batch/s][A
Training batches on cuda:0:  23%|█▏   | 15304/66745 [00:26<01:29, 575.28batch/s][A
Training batches on cuda:0:  23%|█▏   | 15363/66745 [00:26<01:28, 578.27batch/s][A
Training batches on cuda:0:  23%|█▏   | 15422/66745 [00:27<01:28, 580.01batch/s][A
Training batches on cuda:0:  23%|█▏   | 15481/66745 [00:27<01:27, 582.70batch/s][A
Training batches on cuda:0:  23%|█▏   | 15540/66745 [00:27<01:27, 584.62batch/s][A
Training batches on cuda:0:  23%|█▏   | 15600/66745 [00:27<01:26, 588.43batc

Training batches on cuda:0:  31%|█▌   | 20632/66745 [00:36<01:21, 567.52batch/s][A
Training batches on cuda:0:  31%|█▌   | 20689/66745 [00:36<01:22, 556.30batch/s][A
Training batches on cuda:0:  31%|█▌   | 20747/66745 [00:36<01:21, 561.26batch/s][A
Training batches on cuda:0:  31%|█▌   | 20806/66745 [00:36<01:20, 568.27batch/s][A
Training batches on cuda:0:  31%|█▌   | 20865/66745 [00:36<01:20, 572.13batch/s][A
Training batches on cuda:0:  31%|█▌   | 20923/66745 [00:36<01:20, 570.33batch/s][A
Training batches on cuda:0:  31%|█▌   | 20981/66745 [00:36<01:21, 563.29batch/s][A
Training batches on cuda:0:  32%|█▌   | 21041/66745 [00:36<01:19, 571.69batch/s][A
Training batches on cuda:0:  32%|█▌   | 21102/66745 [00:37<01:18, 581.30batch/s][A
Training batches on cuda:0:  32%|█▌   | 21163/66745 [00:37<01:17, 588.29batch/s][A
Training batches on cuda:0:  32%|█▌   | 21222/66745 [00:37<01:18, 583.10batch/s][A
Training batches on cuda:0:  32%|█▌   | 21281/66745 [00:37<01:18, 576.01batc

Training batches on cuda:0:  39%|█▉   | 26305/66745 [00:46<01:10, 573.05batch/s][A
Training batches on cuda:0:  39%|█▉   | 26363/66745 [00:46<01:10, 574.83batch/s][A
Training batches on cuda:0:  40%|█▉   | 26422/66745 [00:46<01:09, 576.95batch/s][A
Training batches on cuda:0:  40%|█▉   | 26481/66745 [00:46<01:09, 578.38batch/s][A
Training batches on cuda:0:  40%|█▉   | 26539/66745 [00:46<01:09, 575.21batch/s][A
Training batches on cuda:0:  40%|█▉   | 26598/66745 [00:46<01:09, 578.90batch/s][A
Training batches on cuda:0:  40%|█▉   | 26656/66745 [00:46<01:09, 574.86batch/s][A
Training batches on cuda:0:  40%|██   | 26715/66745 [00:46<01:09, 576.63batch/s][A
Training batches on cuda:0:  40%|██   | 26773/66745 [00:46<01:10, 565.84batch/s][A
Training batches on cuda:0:  40%|██   | 26830/66745 [00:47<01:10, 565.40batch/s][A
Training batches on cuda:0:  40%|██   | 26888/66745 [00:47<01:09, 569.45batch/s][A
Training batches on cuda:0:  40%|██   | 26945/66745 [00:47<01:10, 564.69batc

Training batches on cuda:0:  48%|██▍  | 31931/66745 [00:56<01:03, 548.22batch/s][A
Training batches on cuda:0:  48%|██▍  | 31990/66745 [00:56<01:02, 558.24batch/s][A
Training batches on cuda:0:  48%|██▍  | 32047/66745 [00:56<01:01, 560.66batch/s][A
Training batches on cuda:0:  48%|██▍  | 32106/66745 [00:56<01:01, 566.75batch/s][A
Training batches on cuda:0:  48%|██▍  | 32167/66745 [00:56<00:59, 578.24batch/s][A
Training batches on cuda:0:  48%|██▍  | 32227/66745 [00:56<00:59, 584.27batch/s][A
Training batches on cuda:0:  48%|██▍  | 32288/66745 [00:56<00:58, 590.31batch/s][A
Training batches on cuda:0:  48%|██▍  | 32348/66745 [00:56<00:58, 588.52batch/s][A
Training batches on cuda:0:  49%|██▍  | 32407/66745 [00:56<00:59, 574.88batch/s][A
Training batches on cuda:0:  49%|██▍  | 32465/66745 [00:56<01:00, 563.87batch/s][A
Training batches on cuda:0:  49%|██▍  | 32525/66745 [00:57<00:59, 572.37batch/s][A
Training batches on cuda:0:  49%|██▍  | 32586/66745 [00:57<00:58, 582.64batc

Training batches on cuda:0:  56%|██▊  | 37615/66745 [01:06<00:51, 560.80batch/s][A
Training batches on cuda:0:  56%|██▊  | 37674/66745 [01:06<00:51, 569.15batch/s][A
Training batches on cuda:0:  57%|██▊  | 37733/66745 [01:06<00:50, 573.54batch/s][A
Training batches on cuda:0:  57%|██▊  | 37792/66745 [01:06<00:50, 576.60batch/s][A
Training batches on cuda:0:  57%|██▊  | 37851/66745 [01:06<00:49, 580.25batch/s][A
Training batches on cuda:0:  57%|██▊  | 37911/66745 [01:06<00:49, 583.28batch/s][A
Training batches on cuda:0:  57%|██▊  | 37970/66745 [01:06<00:49, 585.17batch/s][A
Training batches on cuda:0:  57%|██▊  | 38030/66745 [01:06<00:48, 586.82batch/s][A
Training batches on cuda:0:  57%|██▊  | 38089/66745 [01:06<00:48, 587.56batch/s][A
Training batches on cuda:0:  57%|██▊  | 38148/66745 [01:06<00:49, 582.65batch/s][A
Training batches on cuda:0:  57%|██▊  | 38207/66745 [01:07<00:48, 582.93batch/s][A
Training batches on cuda:0:  57%|██▊  | 38266/66745 [01:07<00:48, 582.81batc

Training batches on cuda:0:  65%|███▏ | 43331/66745 [01:15<00:41, 568.62batch/s][A
Training batches on cuda:0:  65%|███▎ | 43390/66745 [01:15<00:40, 574.85batch/s][A
Training batches on cuda:0:  65%|███▎ | 43448/66745 [01:16<00:41, 567.72batch/s][A
Training batches on cuda:0:  65%|███▎ | 43505/66745 [01:16<00:40, 567.86batch/s][A
Training batches on cuda:0:  65%|███▎ | 43562/66745 [01:16<00:41, 565.21batch/s][A
Training batches on cuda:0:  65%|███▎ | 43620/66745 [01:16<00:40, 567.52batch/s][A
Training batches on cuda:0:  65%|███▎ | 43677/66745 [01:16<00:40, 564.19batch/s][A
Training batches on cuda:0:  66%|███▎ | 43736/66745 [01:16<00:40, 570.30batch/s][A
Training batches on cuda:0:  66%|███▎ | 43795/66745 [01:16<00:39, 574.61batch/s][A
Training batches on cuda:0:  66%|███▎ | 43854/66745 [01:16<00:39, 577.57batch/s][A
Training batches on cuda:0:  66%|███▎ | 43913/66745 [01:16<00:39, 579.63batch/s][A
Training batches on cuda:0:  66%|███▎ | 43972/66745 [01:17<00:39, 581.16batc

KeyboardInterrupt: 

### Compute scores for given triplets

In [8]:
# compute scores for positive and negative triplets 

batch_size = 512

n = train.size(0) // batch_size
pos_train_preds = []
for i in range(n+1):
    start_idx = i*batch_size
    end_idx = min((i+1)*batch_size, train.size(0))
    edge = train[start_idx:end_idx]
    pos_train_preds += [model_kg.trained_model.model.score_hrt(edge).squeeze().cpu().detach()]
pos_train_pred = torch.cat(pos_train_preds, dim=0)

n = valid.size(0) // batch_size
pos_valid_preds = []
for i in range(n+1):
    start_idx = i*batch_size
    end_idx = min((i+1)*batch_size, valid.size(0))
    edge = valid[start_idx:end_idx]
    pos_valid_preds += [model_kg.trained_model.model.score_hrt(edge).squeeze().cpu().detach()]
pos_valid_pred = torch.cat(pos_valid_preds, dim=0)

n = valid_neg.size(0) // batch_size
neg_valid_preds = []
for i in range(n+1):
    start_idx = i*batch_size
    end_idx = min((i+1)*batch_size, valid_neg.size(0))
    edge = valid_neg[start_idx:end_idx]
    neg_valid_preds += [model_kg.trained_model.model.score_hrt(edge).squeeze().cpu().detach()]
neg_valid_pred = torch.cat(neg_valid_preds, dim=0)

n = test.size(0) // batch_size
pos_test_preds = []
for i in range(n+1):
    start_idx = i*batch_size
    end_idx = min((i+1)*batch_size, test.size(0))
    edge = test[start_idx:end_idx]
    pos_test_preds += [model_kg.trained_model.model.score_hrt(edge).squeeze().cpu().detach()]
pos_test_pred = torch.cat(pos_test_preds, dim=0)

n = test_neg.size(0) // batch_size
neg_test_preds = []
for i in range(n+1):
    start_idx = i*batch_size
    end_idx = min((i+1)*batch_size, test_neg.size(0))
    edge = test_neg[start_idx:end_idx]
    neg_test_preds += [model_kg.trained_model.model.score_hrt(edge).squeeze().cpu().detach()]
neg_test_pred = torch.cat(neg_test_preds, dim=0)

### Evaluate my results

In [9]:
# Evaluate the coputed scores - hits@K

evaluator = Evaluator(name = 'ogbl-ddi')

results = {}
for K in [10, 20, 30]:
    evaluator.K = K
    train_hits = evaluator.eval({
        'y_pred_pos': pos_train_pred,
        'y_pred_neg': neg_valid_pred,
    })[f'hits@{K}']
    valid_hits = evaluator.eval({
        'y_pred_pos': pos_valid_pred,
        'y_pred_neg': neg_valid_pred,
    })[f'hits@{K}']
    test_hits = evaluator.eval({
        'y_pred_pos': pos_test_pred,
        'y_pred_neg': neg_test_pred,
    })[f'hits@{K}']
    
    results[f'Hits@{K}'] = (train_hits, valid_hits, test_hits)
    
    
for hits, result in results.items():
    print(hits)
#     print(result)
    train_hits, valid_hits, test_hits = result
    print(f'Train: {100 * train_hits:.2f}%')
    print(f'Valid: {100 * valid_hits:.2f}%')
    print(f'Test: {100 * test_hits:.2f}%')


Hits@10
Train: 0.01%
Valid: 0.01%
Test: 0.01%
Hits@20
Train: 0.02%
Valid: 0.02%
Test: 0.02%
Hits@30
Train: 0.03%
Valid: 0.03%
Test: 0.03%


In [31]:
model_kg.predict_head('2424', '0')

Leuprolide - decrease_adverse_effects:
      head_id head_label     score  in_training
1776     1776       2597 -4.225868         True
1787     1787       2606 -4.259010         True
1549     1549       2392 -4.277555         True
3626     3626       4261 -4.303725         True
4148     4148        892 -4.309386         True
1917     1917       2723 -4.312338        False
1929     1929       2734 -4.312355         True
1610     1610       2447 -4.334918         True
3955     3955        718 -4.335896        False
1236     1236        211 -4.366917         True


In [32]:
print(model_kg.trained_model.get_metric('hits@1'))
print(model_kg.trained_model.get_metric('hits@5'))
print(model_kg.trained_model.get_metric('hits@10'))

0.0005993003168800426
0.0036557319329682597
0.007348920135741521


### Optimizing parameters

In [9]:
from pykeen.hpo import hpo_pipeline_from_config

config = {
    'optuna': dict(
        n_trials=5,
    ),
    'pipeline': dict(
#         dataset='Nations',
        training = train_tf,
        testing = test_tf,
        validation = valid_tf,
        model='TransE',
        model_kwargs_ranges=dict(
               embedding_dim=dict(type=int, low=50, high=220, q=20),
        ),
        optimizer='Adam',
        optimizer_kwargs=dict(lr=0.01),
        loss='marginranking',
        loss_kwargs=dict(margin=1),
        training_loop='slcwa',
        training_kwargs=dict(num_epochs=20, batch_size=128),
        negative_sampler='basic',
        negative_sampler_kwargs=dict(num_negs_per_pos=1),
        evaluator_kwargs=dict(filtered=True),
        evaluation_kwargs=dict(batch_size=128),
        stopper='early',
        stopper_kwargs=dict(frequency=5, patience=2, relative_delta=0.002),
    )
}

In [12]:
hpo_pipeline_result = hpo_pipeline_from_config(config)

[32m[I 2023-01-30 09:10:42,742][0m A new study created in memory with name: no-name-c005a0f9-7d4f-437f-bc0c-163f72eb63f9[0m
INFO:pykeen.hpo.hpo:Using model: <class 'pykeen.models.unimodal.trans_e.TransE'>
INFO:pykeen.hpo.hpo:Using loss: <class 'pykeen.losses.MarginRankingLoss'>
INFO:pykeen.hpo.hpo:Using optimizer: <class 'torch.optim.adam.Adam'>
INFO:pykeen.hpo.hpo:Using training loop: <class 'pykeen.training.slcwa.SLCWATrainingLoop'>
INFO:pykeen.hpo.hpo:Using negative sampler: <class 'pykeen.sampling.basic_negative_sampler.BasicNegativeSampler'>
INFO:pykeen.hpo.hpo:Using evaluator: <class 'pykeen.evaluation.rank_based_evaluator.RankBasedEvaluator'>
INFO:pykeen.hpo.hpo:Attempting to maximize both.realistic.inverse_harmonic_mean_rank
INFO:pykeen.hpo.hpo:Filter validation triples when testing: True
INFO:pykeen.pipeline.api:Using device: None
INFO:pykeen.stoppers.early_stopping:Inferred checkpoint path for best model weights: /work/.data/pykeen/checkpoints/best-model-weights-788cfe08-4

Training batches on cuda:0:  48%|██████████████▊                | 7956/16687 [00:13<00:14, 583.16batch/s][A
Training batches on cuda:0:  48%|██████████████▉                | 8015/16687 [00:14<00:15, 570.94batch/s][A
Training batches on cuda:0:  48%|██████████████▉                | 8073/16687 [00:14<00:15, 572.35batch/s][A
Training batches on cuda:0:  49%|███████████████                | 8133/16687 [00:14<00:14, 578.05batch/s][A
Training batches on cuda:0:  49%|███████████████▏               | 8191/16687 [00:14<00:14, 573.20batch/s][A
Training batches on cuda:0:  49%|███████████████▎               | 8249/16687 [00:14<00:14, 566.74batch/s][A
Training batches on cuda:0:  50%|███████████████▍               | 8306/16687 [00:14<00:14, 564.44batch/s][A
Training batches on cuda:0:  50%|███████████████▌               | 8365/16687 [00:14<00:14, 571.40batch/s][A
Training batches on cuda:0:  50%|███████████████▋               | 8423/16687 [00:14<00:14, 566.37batch/s][A
Training batches on

Training batches on cuda:0:  99%|█████████████████████████████▋| 16525/16687 [00:29<00:00, 574.50batch/s][A
Training batches on cuda:0:  99%|█████████████████████████████▊| 16583/16687 [00:29<00:00, 567.02batch/s][A
Training batches on cuda:0: 100%|█████████████████████████████▉| 16640/16687 [00:29<00:00, 561.47batch/s][A
Training epochs on cuda:0:   5%|▌           | 1/20 [00:29<09:26, 29.82s/epoch, loss=0.659, prev_loss=nan][A
Training batches on cuda:0:   0%|                                           | 0/16687 [00:00<?, ?batch/s][A
Training batches on cuda:0:   0%|                                 | 18/16687 [00:00<01:33, 177.59batch/s][A
Training batches on cuda:0:   0%|▏                                | 72/16687 [00:00<00:42, 386.62batch/s][A
Training batches on cuda:0:   1%|▎                               | 131/16687 [00:00<00:34, 477.46batch/s][A
Training batches on cuda:0:   1%|▎                               | 191/16687 [00:00<00:31, 523.81batch/s][A
Training batches on

Training batches on cuda:0:  50%|███████████████▌               | 8383/16687 [00:14<00:15, 540.63batch/s][A
Training batches on cuda:0:  51%|███████████████▋               | 8440/16687 [00:14<00:15, 547.16batch/s][A
Training batches on cuda:0:  51%|███████████████▊               | 8495/16687 [00:15<00:15, 537.34batch/s][A
Training batches on cuda:0:  51%|███████████████▉               | 8550/16687 [00:15<00:15, 540.32batch/s][A
Training batches on cuda:0:  52%|███████████████▉               | 8607/16687 [00:15<00:14, 546.34batch/s][A
Training batches on cuda:0:  52%|████████████████               | 8663/16687 [00:15<00:14, 549.51batch/s][A
Training batches on cuda:0:  52%|████████████████▏              | 8721/16687 [00:15<00:14, 557.02batch/s][A
Training batches on cuda:0:  53%|████████████████▎              | 8779/16687 [00:15<00:14, 563.39batch/s][A
Training batches on cuda:0:  53%|████████████████▍              | 8836/16687 [00:15<00:13, 561.53batch/s][A
Training batches on

Training batches on cuda:0:   1%|▍                               | 243/16687 [00:00<00:31, 530.36batch/s][A
Training batches on cuda:0:   2%|▌                               | 301/16687 [00:00<00:30, 545.83batch/s][A
Training batches on cuda:0:   2%|▋                               | 359/16687 [00:00<00:29, 555.65batch/s][A
Training batches on cuda:0:   2%|▊                               | 417/16687 [00:00<00:28, 561.71batch/s][A
Training batches on cuda:0:   3%|▉                               | 475/16687 [00:00<00:28, 566.56batch/s][A
Training batches on cuda:0:   3%|█                               | 533/16687 [00:01<00:28, 569.65batch/s][A
Training batches on cuda:0:   4%|█▏                              | 591/16687 [00:01<00:28, 572.04batch/s][A
Training batches on cuda:0:   4%|█▏                              | 649/16687 [00:01<00:27, 573.32batch/s][A
Training batches on cuda:0:   4%|█▎                              | 707/16687 [00:01<00:27, 573.70batch/s][A
Training batches on

Training batches on cuda:0:  53%|████████████████▍              | 8821/16687 [00:15<00:13, 563.43batch/s][A
Training batches on cuda:0:  53%|████████████████▍              | 8879/16687 [00:15<00:13, 565.86batch/s][A
Training batches on cuda:0:  54%|████████████████▌              | 8936/16687 [00:16<00:13, 564.63batch/s][A
Training batches on cuda:0:  54%|████████████████▋              | 8993/16687 [00:16<00:13, 566.21batch/s][A
Training batches on cuda:0:  54%|████████████████▊              | 9050/16687 [00:16<00:13, 564.00batch/s][A
Training batches on cuda:0:  55%|████████████████▉              | 9107/16687 [00:16<00:13, 565.70batch/s][A
Training batches on cuda:0:  55%|█████████████████              | 9164/16687 [00:16<00:13, 566.59batch/s][A
Training batches on cuda:0:  55%|█████████████████▏             | 9221/16687 [00:16<00:13, 566.71batch/s][A
Training batches on cuda:0:  56%|█████████████████▏             | 9278/16687 [00:16<00:13, 566.66batch/s][A
Training batches on

Training batches on cuda:0:   4%|█▏                              | 625/16687 [00:01<00:28, 559.46batch/s][A
Training batches on cuda:0:   4%|█▎                              | 683/16687 [00:01<00:28, 563.10batch/s][A
Training batches on cuda:0:   4%|█▍                              | 741/16687 [00:01<00:28, 565.70batch/s][A
Training batches on cuda:0:   5%|█▌                              | 799/16687 [00:01<00:27, 567.46batch/s][A
Training batches on cuda:0:   5%|█▋                              | 857/16687 [00:01<00:27, 568.74batch/s][A
Training batches on cuda:0:   5%|█▊                              | 914/16687 [00:01<00:27, 566.43batch/s][A
Training batches on cuda:0:   6%|█▊                              | 971/16687 [00:01<00:27, 562.61batch/s][A
Training batches on cuda:0:   6%|█▉                             | 1030/16687 [00:01<00:27, 569.53batch/s][A
Training batches on cuda:0:   7%|██                             | 1088/16687 [00:02<00:27, 570.40batch/s][A
Training batches on

Training batches on cuda:0:  55%|█████████████████▏             | 9251/16687 [00:16<00:13, 559.19batch/s][A
Training batches on cuda:0:  56%|█████████████████▎             | 9309/16687 [00:16<00:13, 564.58batch/s][A
Training batches on cuda:0:  56%|█████████████████▍             | 9368/16687 [00:16<00:12, 571.67batch/s][A
Training batches on cuda:0:  56%|█████████████████▌             | 9426/16687 [00:16<00:12, 568.01batch/s][A
Training batches on cuda:0:  57%|█████████████████▌             | 9483/16687 [00:16<00:12, 564.82batch/s][A
Training batches on cuda:0:  57%|█████████████████▋             | 9542/16687 [00:17<00:12, 570.94batch/s][A
Training batches on cuda:0:  58%|█████████████████▊             | 9600/16687 [00:17<00:12, 567.17batch/s][A
Training batches on cuda:0:  58%|█████████████████▉             | 9657/16687 [00:17<00:12, 561.91batch/s][A
Training batches on cuda:0:  58%|██████████████████             | 9715/16687 [00:17<00:12, 565.40batch/s][A
Training batches on

Training batches on cuda:0:   6%|█▉                             | 1023/16687 [00:01<00:28, 554.25batch/s][A
Training batches on cuda:0:   6%|██                             | 1081/16687 [00:02<00:27, 560.01batch/s][A
Training batches on cuda:0:   7%|██                             | 1139/16687 [00:02<00:27, 564.03batch/s][A
Training batches on cuda:0:   7%|██▏                            | 1196/16687 [00:02<00:27, 562.70batch/s][A
Training batches on cuda:0:   8%|██▎                            | 1253/16687 [00:02<00:27, 561.21batch/s][A
Training batches on cuda:0:   8%|██▍                            | 1310/16687 [00:02<00:27, 557.64batch/s][A
Training batches on cuda:0:   8%|██▌                            | 1369/16687 [00:02<00:27, 565.41batch/s][A
Training batches on cuda:0:   9%|██▋                            | 1427/16687 [00:02<00:26, 566.99batch/s][A
Training batches on cuda:0:   9%|██▊                            | 1485/16687 [00:02<00:26, 570.03batch/s][A
Training batches on

Training batches on cuda:0:  58%|█████████████████▉             | 9634/16687 [00:17<00:12, 562.49batch/s][A
Training batches on cuda:0:  58%|██████████████████             | 9692/16687 [00:17<00:12, 565.12batch/s][A
Training batches on cuda:0:  58%|██████████████████             | 9750/16687 [00:17<00:12, 566.78batch/s][A
Training batches on cuda:0:  59%|██████████████████▏            | 9808/16687 [00:17<00:12, 568.40batch/s][A
Training batches on cuda:0:  59%|██████████████████▎            | 9865/16687 [00:17<00:12, 564.42batch/s][A
Training batches on cuda:0:  59%|██████████████████▍            | 9923/16687 [00:17<00:11, 566.58batch/s][A
Training batches on cuda:0:  60%|██████████████████▌            | 9981/16687 [00:17<00:11, 569.77batch/s][A
Training batches on cuda:0:  60%|██████████████████            | 10038/16687 [00:17<00:11, 554.87batch/s][A
Training batches on cuda:0:  60%|██████████████████▏           | 10094/16687 [00:18<00:11, 556.11batch/s][A
Training batches on

Training batches on cuda:0:   7%|██▏                            | 1199/16687 [00:02<00:27, 557.14batch/s][A
Training batches on cuda:0:   8%|██▎                            | 1255/16687 [00:02<00:27, 556.32batch/s][A
Training batches on cuda:0:   8%|██▍                            | 1312/16687 [00:02<00:27, 559.09batch/s][A
Training batches on cuda:0:   8%|██▌                            | 1370/16687 [00:02<00:27, 565.05batch/s][A
Training batches on cuda:0:   9%|██▋                            | 1429/16687 [00:02<00:26, 569.53batch/s][A
Training batches on cuda:0:   9%|██▊                            | 1487/16687 [00:02<00:26, 571.60batch/s][A
Training batches on cuda:0:   9%|██▊                            | 1546/16687 [00:02<00:26, 574.86batch/s][A
Training batches on cuda:0:  10%|██▉                            | 1605/16687 [00:02<00:26, 577.09batch/s][A
Training batches on cuda:0:  10%|███                            | 1664/16687 [00:03<00:25, 578.94batch/s][A
Training batches on

Training batches on cuda:0:  58%|██████████████████             | 9699/16687 [00:17<00:12, 563.00batch/s][A
Training batches on cuda:0:  58%|██████████████████▏            | 9757/16687 [00:17<00:12, 566.15batch/s][A
Training batches on cuda:0:  59%|██████████████████▏            | 9815/16687 [00:17<00:12, 568.89batch/s][A
Training batches on cuda:0:  59%|██████████████████▎            | 9873/16687 [00:17<00:11, 571.24batch/s][A
Training batches on cuda:0:  60%|██████████████████▍            | 9931/16687 [00:17<00:11, 572.71batch/s][A
Training batches on cuda:0:  60%|██████████████████▌            | 9990/16687 [00:18<00:11, 576.87batch/s][A
Training batches on cuda:0:  60%|██████████████████            | 10048/16687 [00:18<00:11, 577.15batch/s][A
Training batches on cuda:0:  61%|██████████████████▏           | 10106/16687 [00:18<00:11, 575.58batch/s][A
Training batches on cuda:0:  61%|██████████████████▎           | 10164/16687 [00:18<00:11, 575.12batch/s][A
Training batches on

Training batches on cuda:0:   9%|██▋                            | 1473/16687 [00:02<00:27, 562.93batch/s][A
Training batches on cuda:0:   9%|██▊                            | 1531/16687 [00:02<00:26, 567.80batch/s][A
Training batches on cuda:0:  10%|██▉                            | 1588/16687 [00:02<00:26, 563.23batch/s][A
Training batches on cuda:0:  10%|███                            | 1645/16687 [00:03<00:26, 561.91batch/s][A
Training batches on cuda:0:  10%|███▏                           | 1702/16687 [00:03<00:26, 563.01batch/s][A
Training batches on cuda:0:  11%|███▎                           | 1759/16687 [00:03<00:26, 563.73batch/s][A
Training batches on cuda:0:  11%|███▎                           | 1816/16687 [00:03<00:26, 561.41batch/s][A
Training batches on cuda:0:  11%|███▍                           | 1873/16687 [00:03<00:26, 560.72batch/s][A
Training batches on cuda:0:  12%|███▌                           | 1930/16687 [00:03<00:26, 558.02batch/s][A
Training batches on

Training batches on cuda:0:  60%|██████████████████            | 10024/16687 [00:18<00:12, 535.44batch/s][A
Training batches on cuda:0:  60%|██████████████████▏           | 10083/16687 [00:18<00:11, 550.45batch/s][A
Training batches on cuda:0:  61%|██████████████████▏           | 10142/16687 [00:18<00:11, 561.01batch/s][A
Training batches on cuda:0:  61%|██████████████████▎           | 10201/16687 [00:18<00:11, 568.49batch/s][A
Training batches on cuda:0:  61%|██████████████████▍           | 10260/16687 [00:18<00:11, 573.71batch/s][A
Training batches on cuda:0:  62%|██████████████████▌           | 10318/16687 [00:18<00:11, 568.27batch/s][A
Training batches on cuda:0:  62%|██████████████████▋           | 10375/16687 [00:18<00:11, 567.12batch/s][A
Training batches on cuda:0:  63%|██████████████████▊           | 10434/16687 [00:18<00:10, 571.36batch/s][A
Training batches on cuda:0:  63%|██████████████████▊           | 10492/16687 [00:18<00:10, 565.49batch/s][A
Training batches on

Training batches on cuda:0:  11%|███▎                           | 1779/16687 [00:03<00:25, 575.44batch/s][A
Training batches on cuda:0:  11%|███▍                           | 1838/16687 [00:03<00:25, 576.98batch/s][A
Training batches on cuda:0:  11%|███▌                           | 1896/16687 [00:03<00:26, 560.80batch/s][A
Training batches on cuda:0:  12%|███▋                           | 1953/16687 [00:03<00:26, 558.34batch/s][A
Training batches on cuda:0:  12%|███▋                           | 2009/16687 [00:03<00:27, 538.06batch/s][A
Training batches on cuda:0:  12%|███▊                           | 2066/16687 [00:03<00:26, 547.02batch/s][A
Training batches on cuda:0:  13%|███▉                           | 2121/16687 [00:03<00:26, 547.80batch/s][A
Training batches on cuda:0:  13%|████                           | 2177/16687 [00:03<00:26, 549.98batch/s][A
Training batches on cuda:0:  13%|████▏                          | 2233/16687 [00:04<00:26, 547.22batch/s][A
Training batches on

Training batches on cuda:0:  62%|██████████████████▍           | 10280/16687 [00:18<00:11, 540.32batch/s][A
Training batches on cuda:0:  62%|██████████████████▌           | 10335/16687 [00:18<00:11, 534.86batch/s][A
Training batches on cuda:0:  62%|██████████████████▋           | 10392/16687 [00:18<00:11, 542.46batch/s][A
Training batches on cuda:0:  63%|██████████████████▊           | 10449/16687 [00:18<00:11, 548.07batch/s][A
Training batches on cuda:0:  63%|██████████████████▉           | 10505/16687 [00:19<00:11, 550.02batch/s][A
Training batches on cuda:0:  63%|██████████████████▉           | 10561/16687 [00:19<00:11, 551.77batch/s][A
Training batches on cuda:0:  64%|███████████████████           | 10619/16687 [00:19<00:10, 557.94batch/s][A
Training batches on cuda:0:  64%|███████████████████▏          | 10678/16687 [00:19<00:10, 565.89batch/s][A
Training batches on cuda:0:  64%|███████████████████▎          | 10735/16687 [00:19<00:10, 559.10batch/s][A
Training batches on

Training batches on cuda:0:   9%|██▋                            | 1447/16687 [00:03<00:31, 491.38batch/s][A
Training batches on cuda:0:   9%|██▊                            | 1499/16687 [00:03<00:30, 498.82batch/s][A
Training batches on cuda:0:   9%|██▉                            | 1549/16687 [00:03<00:33, 449.99batch/s][A
Training batches on cuda:0:  10%|██▉                            | 1596/16687 [00:03<00:33, 453.45batch/s][A
Training batches on cuda:0:  10%|███                            | 1643/16687 [00:03<00:37, 401.08batch/s][A
Training batches on cuda:0:  10%|███▏                           | 1695/16687 [00:03<00:34, 431.55batch/s][A
Training batches on cuda:0:  10%|███▏                           | 1743/16687 [00:03<00:33, 443.50batch/s][A
Training batches on cuda:0:  11%|███▎                           | 1801/16687 [00:03<00:30, 480.34batch/s][A
Training batches on cuda:0:  11%|███▍                           | 1851/16687 [00:04<00:31, 467.64batch/s][A
Training batches on

Training batches on cuda:0:  54%|████████████████▊              | 9068/16687 [00:19<00:17, 442.88batch/s][A
Training batches on cuda:0:  55%|████████████████▉              | 9114/16687 [00:19<00:16, 447.45batch/s][A
Training batches on cuda:0:  55%|█████████████████              | 9159/16687 [00:19<00:17, 435.15batch/s][A
Training batches on cuda:0:  55%|█████████████████              | 9209/16687 [00:19<00:16, 453.56batch/s][A
Training batches on cuda:0:  55%|█████████████████▏             | 9255/16687 [00:19<00:16, 453.17batch/s][A
Training batches on cuda:0:  56%|█████████████████▎             | 9309/16687 [00:19<00:15, 475.78batch/s][A
Training batches on cuda:0:  56%|█████████████████▍             | 9357/16687 [00:19<00:16, 449.72batch/s][A
Training batches on cuda:0:  56%|█████████████████▍             | 9403/16687 [00:19<00:16, 451.43batch/s][A
Training batches on cuda:0:  57%|█████████████████▌             | 9449/16687 [00:19<00:16, 446.09batch/s][A
Training batches on

Training epochs on cuda:0:  45%|████▌     | 9/20 [05:21<06:27, 35.24s/epoch, loss=0.651, prev_loss=0.651][A
Training batches on cuda:0:   0%|                                           | 0/16687 [00:00<?, ?batch/s][A
Training batches on cuda:0:   0%|                                  | 10/16687 [00:00<02:48, 99.10batch/s][A
Training batches on cuda:0:   0%|▏                                | 64/16687 [00:00<00:46, 355.46batch/s][A
Training batches on cuda:0:   1%|▏                               | 107/16687 [00:00<00:42, 385.90batch/s][A
Training batches on cuda:0:   1%|▎                               | 159/16687 [00:00<00:37, 438.34batch/s][A
Training batches on cuda:0:   1%|▍                               | 210/16687 [00:00<00:35, 463.79batch/s][A
Training batches on cuda:0:   2%|▍                               | 258/16687 [00:00<00:35, 468.70batch/s][A
Training batches on cuda:0:   2%|▌                               | 313/16687 [00:00<00:33, 493.69batch/s][A
Training batches on

Training batches on cuda:0:  45%|█████████████▊                 | 7427/16687 [00:15<00:19, 478.53batch/s][A
Training batches on cuda:0:  45%|█████████████▉                 | 7484/16687 [00:15<00:18, 503.61batch/s][A
Training batches on cuda:0:  45%|█████████████▉                 | 7535/16687 [00:15<00:18, 490.46batch/s][A
Training batches on cuda:0:  45%|██████████████                 | 7591/16687 [00:16<00:17, 509.89batch/s][A
Training batches on cuda:0:  46%|██████████████▏                | 7643/16687 [00:16<00:18, 487.82batch/s][A
Training batches on cuda:0:  46%|██████████████▎                | 7696/16687 [00:16<00:18, 499.09batch/s][A
Training batches on cuda:0:  46%|██████████████▍                | 7747/16687 [00:16<00:18, 494.09batch/s][A
Training batches on cuda:0:  47%|██████████████▍                | 7800/16687 [00:16<00:17, 503.70batch/s][A
Training batches on cuda:0:  47%|██████████████▌                | 7852/16687 [00:16<00:17, 508.14batch/s][A
Training batches on

Training batches on cuda:0:  95%|████████████████████████████▍ | 15819/16687 [00:31<00:01, 539.37batch/s][A
Training batches on cuda:0:  95%|████████████████████████████▌ | 15877/16687 [00:31<00:01, 548.57batch/s][A
Training batches on cuda:0:  95%|████████████████████████████▋ | 15934/16687 [00:31<00:01, 553.83batch/s][A
Training batches on cuda:0:  96%|████████████████████████████▋ | 15991/16687 [00:31<00:01, 558.08batch/s][A
Training batches on cuda:0:  96%|████████████████████████████▊ | 16048/16687 [00:31<00:01, 559.66batch/s][A
Training batches on cuda:0:  97%|████████████████████████████▉ | 16105/16687 [00:31<00:01, 562.18batch/s][A
Training batches on cuda:0:  97%|█████████████████████████████ | 16162/16687 [00:31<00:00, 563.98batch/s][A
Training batches on cuda:0:  97%|█████████████████████████████▏| 16219/16687 [00:31<00:00, 561.69batch/s][A
Training batches on cuda:0:  98%|█████████████████████████████▎| 16276/16687 [00:31<00:00, 563.02batch/s][A
Training batches on

Training batches on cuda:0:  43%|█████████████▎                 | 7174/16687 [00:12<00:16, 559.81batch/s][A
Training batches on cuda:0:  43%|█████████████▍                 | 7230/16687 [00:13<00:17, 556.19batch/s][A
Training batches on cuda:0:  44%|█████████████▌                 | 7287/16687 [00:13<00:16, 557.93batch/s][A
Training batches on cuda:0:  44%|█████████████▋                 | 7344/16687 [00:13<00:16, 559.28batch/s][A
Training batches on cuda:0:  44%|█████████████▋                 | 7401/16687 [00:13<00:16, 560.76batch/s][A
Training batches on cuda:0:  45%|█████████████▊                 | 7458/16687 [00:13<00:16, 561.67batch/s][A
Training batches on cuda:0:  45%|█████████████▉                 | 7515/16687 [00:13<00:16, 562.24batch/s][A
Training batches on cuda:0:  45%|██████████████                 | 7572/16687 [00:13<00:16, 562.32batch/s][A
Training batches on cuda:0:  46%|██████████████▏                | 7629/16687 [00:13<00:16, 562.79batch/s][A
Training batches on

Training batches on cuda:0:  94%|████████████████████████████▎ | 15717/16687 [00:28<00:01, 561.81batch/s][A
Training batches on cuda:0:  95%|████████████████████████████▎ | 15774/16687 [00:28<00:01, 563.89batch/s][A
Training batches on cuda:0:  95%|████████████████████████████▍ | 15831/16687 [00:28<00:01, 564.92batch/s][A
Training batches on cuda:0:  95%|████████████████████████████▌ | 15888/16687 [00:28<00:01, 561.20batch/s][A
Training batches on cuda:0:  96%|████████████████████████████▋ | 15945/16687 [00:28<00:01, 558.30batch/s][A
Training batches on cuda:0:  96%|████████████████████████████▊ | 16001/16687 [00:28<00:01, 557.55batch/s][A
Training batches on cuda:0:  96%|████████████████████████████▊ | 16058/16687 [00:28<00:01, 561.03batch/s][A
Training batches on cuda:0:  97%|████████████████████████████▉ | 16115/16687 [00:28<00:01, 559.49batch/s][A
Training batches on cuda:0:  97%|█████████████████████████████ | 16171/16687 [00:28<00:00, 558.41batch/s][A
Training batches on

Training batches on cuda:0:  45%|█████████████▉                 | 7478/16687 [00:13<00:16, 568.58batch/s][A
Training batches on cuda:0:  45%|█████████████▉                 | 7536/16687 [00:13<00:16, 571.23batch/s][A
Training batches on cuda:0:  46%|██████████████                 | 7594/16687 [00:13<00:15, 569.82batch/s][A
Training batches on cuda:0:  46%|██████████████▏                | 7652/16687 [00:13<00:15, 572.09batch/s][A
Training batches on cuda:0:  46%|██████████████▎                | 7710/16687 [00:13<00:15, 570.35batch/s][A
Training batches on cuda:0:  47%|██████████████▍                | 7768/16687 [00:13<00:15, 564.94batch/s][A
Training batches on cuda:0:  47%|██████████████▌                | 7825/16687 [00:13<00:15, 563.37batch/s][A
Training batches on cuda:0:  47%|██████████████▋                | 7882/16687 [00:14<00:15, 561.23batch/s][A
Training batches on cuda:0:  48%|██████████████▋                | 7939/16687 [00:14<00:15, 562.95batch/s][A
Training batches on

Training batches on cuda:0:  97%|█████████████████████████████ | 16134/16687 [00:28<00:00, 555.18batch/s][A
Training batches on cuda:0:  97%|█████████████████████████████ | 16191/16687 [00:28<00:00, 557.12batch/s][A
Training batches on cuda:0:  97%|█████████████████████████████▏| 16247/16687 [00:28<00:00, 553.98batch/s][A
Training batches on cuda:0:  98%|█████████████████████████████▎| 16303/16687 [00:28<00:00, 554.63batch/s][A
Training batches on cuda:0:  98%|█████████████████████████████▍| 16359/16687 [00:29<00:00, 554.42batch/s][A
Training batches on cuda:0:  98%|█████████████████████████████▌| 16417/16687 [00:29<00:00, 560.42batch/s][A
Training batches on cuda:0:  99%|█████████████████████████████▌| 16475/16687 [00:29<00:00, 565.08batch/s][A
Training batches on cuda:0:  99%|█████████████████████████████▋| 16534/16687 [00:29<00:00, 571.63batch/s][A
Training batches on cuda:0:  99%|█████████████████████████████▊| 16593/16687 [00:29<00:00, 576.25batch/s][A
Training batches on

Training batches on cuda:0:  47%|██████████████▌                | 7868/16687 [00:14<00:15, 562.19batch/s][A
Training batches on cuda:0:  47%|██████████████▋                | 7925/16687 [00:14<00:15, 563.20batch/s][A
Training batches on cuda:0:  48%|██████████████▊                | 7982/16687 [00:14<00:15, 554.46batch/s][A
Training batches on cuda:0:  48%|██████████████▉                | 8038/16687 [00:14<00:15, 552.62batch/s][A
Training batches on cuda:0:  49%|███████████████                | 8095/16687 [00:14<00:15, 556.39batch/s][A
Training batches on cuda:0:  49%|███████████████▏               | 8152/16687 [00:14<00:15, 558.82batch/s][A
Training batches on cuda:0:  49%|███████████████▎               | 8209/16687 [00:14<00:15, 560.78batch/s][A
Training batches on cuda:0:  50%|███████████████▎               | 8266/16687 [00:14<00:14, 562.01batch/s][A
Training batches on cuda:0:  50%|███████████████▍               | 8323/16687 [00:14<00:14, 562.37batch/s][A
Training batches on

Training batches on cuda:0:  99%|█████████████████████████████▌| 16475/16687 [00:29<00:00, 541.44batch/s][A
Training batches on cuda:0:  99%|█████████████████████████████▋| 16533/16687 [00:29<00:00, 549.97batch/s][A
Training batches on cuda:0:  99%|█████████████████████████████▊| 16590/16687 [00:29<00:00, 555.12batch/s][A
Training batches on cuda:0: 100%|█████████████████████████████▉| 16646/16687 [00:29<00:00, 553.90batch/s][A
Training epochs on cuda:0:  65%|██████▌   | 13/20 [08:21<04:21, 37.42s/epoch, loss=0.651, prev_loss=0.65][A
Training batches on cuda:0:   0%|                                           | 0/16687 [00:00<?, ?batch/s][A
Training batches on cuda:0:   0%|                                 | 16/16687 [00:00<01:44, 159.21batch/s][A
Training batches on cuda:0:   0%|▏                                | 66/16687 [00:00<00:46, 359.26batch/s][A
Training batches on cuda:0:   1%|▏                               | 119/16687 [00:00<00:38, 434.03batch/s][A
Training batches on

Training batches on cuda:0:  49%|███████████████▎               | 8212/16687 [00:14<00:15, 553.55batch/s][A
Training batches on cuda:0:  50%|███████████████▎               | 8269/16687 [00:14<00:15, 555.64batch/s][A
Training batches on cuda:0:  50%|███████████████▍               | 8326/16687 [00:14<00:14, 558.24batch/s][A
Training batches on cuda:0:  50%|███████████████▌               | 8382/16687 [00:15<00:15, 541.29batch/s][A
Training batches on cuda:0:  51%|███████████████▋               | 8439/16687 [00:15<00:15, 547.71batch/s][A
Training batches on cuda:0:  51%|███████████████▊               | 8497/16687 [00:15<00:14, 554.89batch/s][A
Training batches on cuda:0:  51%|███████████████▉               | 8553/16687 [00:15<00:14, 548.74batch/s][A
Training batches on cuda:0:  52%|███████████████▉               | 8612/16687 [00:15<00:14, 560.07batch/s][A
Training batches on cuda:0:  52%|████████████████               | 8669/16687 [00:15<00:14, 560.91batch/s][A
Training batches on

Training epochs on cuda:0:  70%|███████   | 14/20 [08:51<03:31, 35.26s/epoch, loss=0.65, prev_loss=0.651][A
Training batches on cuda:0:   0%|                                           | 0/16687 [00:00<?, ?batch/s][A
Training batches on cuda:0:   0%|                                 | 15/16687 [00:00<01:51, 149.97batch/s][A
Training batches on cuda:0:   0%|▏                                | 68/16687 [00:00<00:46, 361.07batch/s][A
Training batches on cuda:0:   1%|▏                               | 119/16687 [00:00<00:38, 427.56batch/s][A
Training batches on cuda:0:   1%|▎                               | 174/16687 [00:00<00:34, 474.12batch/s][A
Training batches on cuda:0:   1%|▍                               | 227/16687 [00:00<00:33, 491.27batch/s][A
Training batches on cuda:0:   2%|▌                               | 277/16687 [00:00<00:33, 491.10batch/s][A
Training batches on cuda:0:   2%|▋                               | 333/16687 [00:00<00:31, 512.44batch/s][A
Training batches on

Training batches on cuda:0:  50%|███████████████▋               | 8413/16687 [00:15<00:14, 553.13batch/s][A
Training batches on cuda:0:  51%|███████████████▋               | 8469/16687 [00:15<00:14, 548.18batch/s][A
Training batches on cuda:0:  51%|███████████████▊               | 8526/16687 [00:15<00:14, 553.52batch/s][A
Training batches on cuda:0:  51%|███████████████▉               | 8584/16687 [00:15<00:14, 559.65batch/s][A
Training batches on cuda:0:  52%|████████████████               | 8640/16687 [00:15<00:14, 545.72batch/s][A
Training batches on cuda:0:  52%|████████████████▏              | 8697/16687 [00:15<00:14, 551.48batch/s][A
Training batches on cuda:0:  52%|████████████████▎              | 8753/16687 [00:15<00:14, 546.27batch/s][A
Training batches on cuda:0:  53%|████████████████▎              | 8808/16687 [00:15<00:14, 536.06batch/s][A
Training batches on cuda:0:  53%|████████████████▍              | 8862/16687 [00:16<00:14, 531.87batch/s][A
Training batches on

Training batches on cuda:0:   0%|                                 | 18/16687 [00:00<01:33, 179.23batch/s][A
Training batches on cuda:0:   0%|▏                                | 76/16687 [00:00<00:40, 412.45batch/s][A
Training batches on cuda:0:   1%|▏                               | 127/16687 [00:00<00:36, 454.56batch/s][A
Training batches on cuda:0:   1%|▎                               | 184/16687 [00:00<00:33, 498.68batch/s][A
Training batches on cuda:0:   1%|▍                               | 240/16687 [00:00<00:31, 518.63batch/s][A
Training batches on cuda:0:   2%|▌                               | 297/16687 [00:00<00:30, 535.97batch/s][A
Training batches on cuda:0:   2%|▋                               | 353/16687 [00:00<00:30, 541.64batch/s][A
Training batches on cuda:0:   2%|▊                               | 410/16687 [00:00<00:29, 549.48batch/s][A
Training batches on cuda:0:   3%|▉                               | 467/16687 [00:00<00:29, 555.76batch/s][A
Training batches on

Training batches on cuda:0:  51%|███████████████▉               | 8571/16687 [00:15<00:14, 565.82batch/s][A
Training batches on cuda:0:  52%|████████████████               | 8628/16687 [00:15<00:14, 566.55batch/s][A
Training batches on cuda:0:  52%|████████████████▏              | 8685/16687 [00:15<00:14, 553.07batch/s][A
Training batches on cuda:0:  52%|████████████████▏              | 8741/16687 [00:15<00:14, 553.46batch/s][A
Training batches on cuda:0:  53%|████████████████▎              | 8798/16687 [00:15<00:14, 556.11batch/s][A
Training batches on cuda:0:  53%|████████████████▍              | 8855/16687 [00:15<00:14, 557.86batch/s][A
Training batches on cuda:0:  53%|████████████████▌              | 8912/16687 [00:15<00:13, 559.45batch/s][A
Training batches on cuda:0:  54%|████████████████▋              | 8968/16687 [00:16<00:13, 551.59batch/s][A
Training batches on cuda:0:  54%|████████████████▊              | 9024/16687 [00:16<00:13, 554.05batch/s][A
Training batches on

Training batches on cuda:0:   1%|▍                               | 236/16687 [00:00<00:31, 519.88batch/s][A
Training batches on cuda:0:   2%|▌                               | 291/16687 [00:00<00:30, 529.43batch/s][A
Training batches on cuda:0:   2%|▋                               | 348/16687 [00:00<00:30, 540.71batch/s][A
Training batches on cuda:0:   2%|▊                               | 403/16687 [00:00<00:29, 543.11batch/s][A
Training batches on cuda:0:   3%|▉                               | 458/16687 [00:00<00:29, 544.49batch/s][A
Training batches on cuda:0:   3%|▉                               | 513/16687 [00:01<00:29, 545.83batch/s][A
Training batches on cuda:0:   3%|█                               | 570/16687 [00:01<00:29, 550.73batch/s][A
Training batches on cuda:0:   4%|█▏                              | 626/16687 [00:01<00:29, 553.50batch/s][A
Training batches on cuda:0:   4%|█▎                              | 682/16687 [00:01<00:28, 552.30batch/s][A
Training batches on

Training batches on cuda:0:  53%|████████████████▍              | 8828/16687 [00:15<00:13, 568.27batch/s][A
Training batches on cuda:0:  53%|████████████████▌              | 8885/16687 [00:15<00:13, 564.74batch/s][A
Training batches on cuda:0:  54%|████████████████▌              | 8942/16687 [00:16<00:13, 562.34batch/s][A
Training batches on cuda:0:  54%|████████████████▋              | 8999/16687 [00:16<00:13, 564.36batch/s][A
Training batches on cuda:0:  54%|████████████████▊              | 9056/16687 [00:16<00:13, 565.80batch/s][A
Training batches on cuda:0:  55%|████████████████▉              | 9113/16687 [00:16<00:13, 566.69batch/s][A
Training batches on cuda:0:  55%|█████████████████              | 9170/16687 [00:16<00:13, 557.78batch/s][A
Training batches on cuda:0:  55%|█████████████████▏             | 9228/16687 [00:16<00:13, 562.13batch/s][A
Training batches on cuda:0:  56%|█████████████████▎             | 9286/16687 [00:16<00:13, 566.43batch/s][A
Training batches on

Training batches on cuda:0:   4%|█▏                              | 623/16687 [00:01<00:28, 554.53batch/s][A
Training batches on cuda:0:   4%|█▎                              | 680/16687 [00:01<00:28, 558.63batch/s][A
Training batches on cuda:0:   4%|█▍                              | 736/16687 [00:01<00:28, 557.34batch/s][A
Training batches on cuda:0:   5%|█▌                              | 792/16687 [00:01<00:28, 556.34batch/s][A
Training batches on cuda:0:   5%|█▋                              | 848/16687 [00:01<00:28, 555.65batch/s][A
Training batches on cuda:0:   5%|█▋                              | 904/16687 [00:01<00:28, 555.82batch/s][A
Training batches on cuda:0:   6%|█▊                              | 960/16687 [00:01<00:28, 548.71batch/s][A
Training batches on cuda:0:   6%|█▉                             | 1017/16687 [00:01<00:28, 552.50batch/s][A
Training batches on cuda:0:   6%|█▉                             | 1074/16687 [00:02<00:28, 556.12batch/s][A
Training batches on

Training batches on cuda:0:  55%|█████████████████              | 9173/16687 [00:16<00:13, 561.27batch/s][A
Training batches on cuda:0:  55%|█████████████████▏             | 9231/16687 [00:16<00:13, 564.07batch/s][A
Training batches on cuda:0:  56%|█████████████████▎             | 9288/16687 [00:16<00:13, 557.60batch/s][A
Training batches on cuda:0:  56%|█████████████████▎             | 9344/16687 [00:16<00:13, 550.09batch/s][A
Training batches on cuda:0:  56%|█████████████████▍             | 9400/16687 [00:16<00:13, 548.24batch/s][A
Training batches on cuda:0:  57%|█████████████████▌             | 9455/16687 [00:16<00:13, 542.73batch/s][A
Training batches on cuda:0:  57%|█████████████████▋             | 9510/16687 [00:17<00:13, 532.91batch/s][A
Training batches on cuda:0:  57%|█████████████████▊             | 9565/16687 [00:17<00:13, 536.73batch/s][A
Training batches on cuda:0:  58%|█████████████████▊             | 9619/16687 [00:17<00:13, 528.79batch/s][A
Training batches on

Training batches on cuda:0:   5%|█▋                              | 863/16687 [00:01<00:28, 560.36batch/s][A
Training batches on cuda:0:   6%|█▊                              | 920/16687 [00:01<00:28, 561.95batch/s][A
Training batches on cuda:0:   6%|█▊                              | 977/16687 [00:01<00:27, 563.35batch/s][A
Training batches on cuda:0:   6%|█▉                             | 1034/16687 [00:01<00:27, 564.47batch/s][A
Training batches on cuda:0:   7%|██                             | 1091/16687 [00:02<00:27, 565.04batch/s][A
Training batches on cuda:0:   7%|██▏                            | 1148/16687 [00:02<00:27, 565.14batch/s][A
Training batches on cuda:0:   7%|██▏                            | 1206/16687 [00:02<00:27, 568.50batch/s][A
Training batches on cuda:0:   8%|██▎                            | 1263/16687 [00:02<00:27, 559.45batch/s][A
Training batches on cuda:0:   8%|██▍                            | 1320/16687 [00:02<00:27, 559.94batch/s][A
Training batches on

Training batches on cuda:0:  57%|█████████████████▌             | 9457/16687 [00:16<00:12, 567.15batch/s][A
Training batches on cuda:0:  57%|█████████████████▋             | 9514/16687 [00:17<00:12, 559.56batch/s][A
Training batches on cuda:0:  57%|█████████████████▊             | 9570/16687 [00:17<00:13, 543.09batch/s][A
Training batches on cuda:0:  58%|█████████████████▉             | 9626/16687 [00:17<00:12, 545.35batch/s][A
Training batches on cuda:0:  58%|█████████████████▉             | 9683/16687 [00:17<00:12, 551.64batch/s][A
Training batches on cuda:0:  58%|██████████████████             | 9739/16687 [00:17<00:12, 545.41batch/s][A
Training batches on cuda:0:  59%|██████████████████▏            | 9794/16687 [00:17<00:12, 542.61batch/s][A
Training batches on cuda:0:  59%|██████████████████▎            | 9851/16687 [00:17<00:12, 549.61batch/s][A
Training batches on cuda:0:  59%|██████████████████▍            | 9907/16687 [00:17<00:12, 549.96batch/s][A
Training batches on

Training batches on cuda:0:   7%|██▏                            | 1206/16687 [00:02<00:27, 562.26batch/s][A
Training batches on cuda:0:   8%|██▎                            | 1263/16687 [00:02<00:27, 563.37batch/s][A
Training batches on cuda:0:   8%|██▍                            | 1320/16687 [00:02<00:27, 565.19batch/s][A
Training batches on cuda:0:   8%|██▌                            | 1377/16687 [00:02<00:27, 566.32batch/s][A
Training batches on cuda:0:   9%|██▋                            | 1435/16687 [00:02<00:26, 568.60batch/s][A
Training batches on cuda:0:   9%|██▊                            | 1492/16687 [00:02<00:26, 565.93batch/s][A
Training batches on cuda:0:   9%|██▉                            | 1549/16687 [00:02<00:26, 561.12batch/s][A
Training batches on cuda:0:  10%|██▉                            | 1606/16687 [00:02<00:26, 562.52batch/s][A
Training batches on cuda:0:  10%|███                            | 1663/16687 [00:03<00:26, 562.08batch/s][A
Training batches on

KeyboardInterrupt: 

In [11]:
hpo_pipeline_result.save_to_directory('hpo_results')

NameError: name 'hpo_pipeline_result' is not defined

### Example from OGB

In [22]:
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(GCN, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(GCNConv(in_channels, hidden_channels, cached=True))
        for _ in range(num_layers - 2):
            self.convs.append(
                GCNConv(hidden_channels, hidden_channels, cached=True))
        self.convs.append(GCNConv(hidden_channels, out_channels, cached=True))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()

    def forward(self, x, adj_t):
        for conv in self.convs[:-1]:
            x = conv(x, adj_t)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x

In [23]:
class LinkPredictor(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(LinkPredictor, self).__init__()

        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for lin in self.lins:
            lin.reset_parameters()

    def forward(self, x_i, x_j):
        x = x_i * x_j
        for lin in self.lins[:-1]:
            x = lin(x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.sigmoid(x)

In [24]:
def train(model, predictor, x, adj_t, split_edge, optimizer, batch_size):

    row, col, _ = adj_t.coo()
    edge_index = torch.stack([col, row], dim=0)

    model.train()
    predictor.train()

    pos_train_edge = split_edge['train']['edge'].to(x.device)

    total_loss = total_examples = 0
    for perm in DataLoader(range(pos_train_edge.size(0)), batch_size,
                           shuffle=True):
        optimizer.zero_grad()

        h = model(x, adj_t)
#         print('h:', h)

        edge = pos_train_edge[perm].t()
#         print('Train: edge:', edge)
#         print()
#         print('h[edge[0]]:', h[edge[0]])
#         print()
#         print('h[edge[1]]:', h[edge[1]])
#         print()

        pos_out = predictor(h[edge[0]], h[edge[1]])
        pos_loss = -torch.log(pos_out + 1e-15).mean()
#         print('pos out:', pos_out)

        edge = negative_sampling(edge_index, num_nodes=x.size(0),
                                 num_neg_samples=perm.size(0), method='dense')

        neg_out = predictor(h[edge[0]], h[edge[1]])
        neg_loss = -torch.log(1 - neg_out + 1e-15).mean()

        loss = pos_loss + neg_loss
        loss.backward()

        torch.nn.utils.clip_grad_norm_(x, 1.0)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        torch.nn.utils.clip_grad_norm_(predictor.parameters(), 1.0)

        optimizer.step()

        num_examples = pos_out.size(0)
        total_loss += loss.item() * num_examples
        total_examples += num_examples

    return total_loss / total_examples

In [29]:
@torch.no_grad()
def test(model, predictor, x, adj_t, split_edge, evaluator, batch_size):
#     print('test')
    
    model.eval()
    predictor.eval()

    h = model(x, adj_t)

    pos_train_edge = split_edge['eval_train']['edge'].to(x.device)
    pos_valid_edge = split_edge['valid']['edge'].to(x.device)
    neg_valid_edge = split_edge['valid']['edge_neg'].to(x.device)
    pos_test_edge = split_edge['test']['edge'].to(x.device)
    neg_test_edge = split_edge['test']['edge_neg'].to(x.device)

    pos_train_preds = []
    for perm in DataLoader(range(pos_train_edge.size(0)), batch_size):
        edge = pos_train_edge[perm].t()
        pos_train_preds += [predictor(h[edge[0]], h[edge[1]]).squeeze().cpu()]
    pos_train_pred = torch.cat(pos_train_preds, dim=0)

    pos_valid_preds = []
    for perm in DataLoader(range(pos_valid_edge.size(0)), batch_size):
        edge = pos_valid_edge[perm].t()
        pos_valid_preds += [predictor(h[edge[0]], h[edge[1]]).squeeze().cpu()]
    pos_valid_pred = torch.cat(pos_valid_preds, dim=0)

    neg_valid_preds = []
    for perm in DataLoader(range(neg_valid_edge.size(0)), batch_size):
        edge = neg_valid_edge[perm].t()
        neg_valid_preds += [predictor(h[edge[0]], h[edge[1]]).squeeze().cpu()]
    neg_valid_pred = torch.cat(neg_valid_preds, dim=0)

    pos_test_preds = []
    for perm in DataLoader(range(pos_test_edge.size(0)), batch_size):
        edge = pos_test_edge[perm].t()
        pos_test_preds += [predictor(h[edge[0]], h[edge[1]]).squeeze().cpu()]
    pos_test_pred = torch.cat(pos_test_preds, dim=0)

    neg_test_preds = []
    for perm in DataLoader(range(neg_test_edge.size(0)), batch_size):
        edge = neg_test_edge[perm].t()
        neg_test_preds += [predictor(h[edge[0]], h[edge[1]]).squeeze().cpu()]
    neg_test_pred = torch.cat(neg_test_preds, dim=0)
    
#     print('pos_train_pred:', pos_train_pred)
#     print('neg_train_pred:', neg_valid_pred)
#     print()

    results = {}
    for K in [10, 20, 30]:
        evaluator.K = K
        train_hits = evaluator.eval({
            'y_pred_pos': pos_train_pred,
            'y_pred_neg': neg_valid_pred,
        })[f'hits@{K}']
        valid_hits = evaluator.eval({
            'y_pred_pos': pos_valid_pred,
            'y_pred_neg': neg_valid_pred,
        })[f'hits@{K}']
        test_hits = evaluator.eval({
            'y_pred_pos': pos_test_pred,
            'y_pred_neg': neg_test_pred,
        })[f'hits@{K}']

        results[f'Hits@{K}'] = (train_hits, valid_hits, test_hits)

    return results

In [30]:
hidden_channels = 256
num_layers = 2
dropout = 0.5
runs = 4
lr = 0.005
batch_size = 64 * 1024
epochs = 5
log_steps = 1
eval_steps = 1

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)

dataset = PygLinkPropPredDataset(name='ogbl-ddi', transform=T.ToSparseTensor())
data = dataset[0]
adj_t = data.adj_t.to(device)

split_edge = dataset.get_edge_split()

# We randomly pick some training samples that we want to evaluate on:
torch.manual_seed(12345)
idx = torch.randperm(split_edge['train']['edge'].size(0))
idx = idx[:split_edge['valid']['edge'].size(0)]
split_edge['eval_train'] = {'edge': split_edge['train']['edge'][idx]}


model = GCN(hidden_channels, hidden_channels,
                hidden_channels, num_layers,
                dropout).to(device)

emb = torch.nn.Embedding(data.adj_t.size(0),
                         hidden_channels).to(device)

print('Embedding:', emb)
print()
predictor = LinkPredictor(hidden_channels, hidden_channels, 1,
                          num_layers, dropout).to(device)

evaluator = Evaluator(name='ogbl-ddi')
# loggers = {
#     'Hits@10': Logger(args.runs, args),
#     'Hits@20': Logger(args.runs, args),
#     'Hits@30': Logger(args.runs, args),
# }

for run in range(runs):
    torch.nn.init.xavier_uniform_(emb.weight)
#     print('Weights:', emb.weight)
#     print()
    model.reset_parameters()
    predictor.reset_parameters()
    optimizer = torch.optim.Adam(
        list(model.parameters()) + list(emb.parameters()) +
        list(predictor.parameters()), lr=lr)

    for epoch in range(1, 1 + epochs):
        loss = train(model, predictor, emb.weight, adj_t, split_edge,
                     optimizer, batch_size)

        if epoch % eval_steps == 0:
#             print('Eval')
            results = test(model, predictor, emb.weight, adj_t, split_edge,
                           evaluator, batch_size)
#             for key, result in results.items():
#                 loggers[key].add_result(run, result)

            if epoch % log_steps == 0:
                for key, result in results.items():
                    train_hits, valid_hits, test_hits = result
                    print(key)
                    print(f'Run: {run + 1:02d}, '
                          f'Epoch: {epoch:02d}, '
                          f'Loss: {loss:.4f}, '
                          f'Train: {100 * train_hits:.2f}%, '
                          f'Valid: {100 * valid_hits:.2f}%, '
                          f'Test: {100 * test_hits:.2f}%')
                print('---')

#     for key in loggers.keys():
#         print(key)
#         loggers[key].print_statistics(run)

# for key in loggers.keys():
#     print(key)
#     loggers[key].print_statistics()

Embedding: Embedding(4267, 256)

Hits@10
Run: 01, Epoch: 01, Loss: 1.2921, Train: 0.03%, Valid: 0.02%, Test: 0.01%
Hits@20
Run: 01, Epoch: 01, Loss: 1.2921, Train: 3.96%, Valid: 3.64%, Test: 2.58%
Hits@30
Run: 01, Epoch: 01, Loss: 1.2921, Train: 4.55%, Valid: 4.17%, Test: 3.92%
---
Hits@10
Run: 01, Epoch: 02, Loss: 0.9923, Train: 3.10%, Valid: 2.87%, Test: 5.68%
Hits@20
Run: 01, Epoch: 02, Loss: 0.9923, Train: 4.74%, Valid: 4.37%, Test: 6.92%
Hits@30
Run: 01, Epoch: 02, Loss: 0.9923, Train: 5.45%, Valid: 5.01%, Test: 7.74%
---
Hits@10
Run: 01, Epoch: 03, Loss: 0.8506, Train: 0.26%, Valid: 0.24%, Test: 0.03%
Hits@20
Run: 01, Epoch: 03, Loss: 0.8506, Train: 0.56%, Valid: 0.51%, Test: 0.08%
Hits@30
Run: 01, Epoch: 03, Loss: 0.8506, Train: 0.75%, Valid: 0.70%, Test: 0.15%
---
Hits@10
Run: 01, Epoch: 04, Loss: 0.7528, Train: 2.34%, Valid: 2.15%, Test: 3.71%
Hits@20
Run: 01, Epoch: 04, Loss: 0.7528, Train: 3.19%, Valid: 2.99%, Test: 4.94%
Hits@30
Run: 01, Epoch: 04, Loss: 0.7528, Train: 4.13