In [1]:
import torch

def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)

!pip install torch-scatter -f https://data.pyg.org/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster -f https://data.pyg.org/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-spline-conv -f https://data.pyg.org/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.12.1+cu113.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 5.0 MB/s 
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.0.9
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.12.1+cu113.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_sparse-0.6.15-cp37-cp37m-linux_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 4.8 MB/s 
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.15
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/

In [2]:
!pip install ogb
!pip install umap-learn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ogb
  Downloading ogb-1.3.5-py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 3.7 MB/s 
Collecting outdated>=0.2.0
  Downloading outdated-0.2.2-py2.py3-none-any.whl (7.5 kB)
Collecting littleutils
  Downloading littleutils-0.2.2.tar.gz (6.6 kB)
Building wheels for collected packages: littleutils
  Building wheel for littleutils (setup.py) ... [?25l[?25hdone
  Created wheel for littleutils: filename=littleutils-0.2.2-py3-none-any.whl size=7047 sha256=6b3c7c3ea9c810af32c0e1f6fb208a9810ab92544e51e255f6475324f0ad07db
  Stored in directory: /root/.cache/pip/wheels/d6/64/cd/32819b511a488e4993f2fab909a95330289c3f4e0f6ef4676d
Successfully built littleutils
Installing collected packages: littleutils, outdated, ogb
Successfully installed littleutils-0.2.2 ogb-1.3.5 outdated-0.2.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels

In [3]:
from typing import Callable, Optional
from torch_geometric.typing import Adj, OptTensor

import torch
from torch import Tensor
import torch.nn.functional as F
from torch_sparse import SparseTensor, matmul
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.conv.gcn_conv import gcn_norm

import torch.nn as nn
from torch.nn import ModuleList, Linear, BatchNorm1d
import torch.optim as optim
import numpy as np

from torch_geometric.nn import GCNConv, GATConv, SAGEConv, JumpingKnowledge
from torch_geometric.data import NeighborSampler
import torch_geometric.transforms as T

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from tqdm import tqdm

from torch_geometric.utils import to_undirected
from torch_sparse import SparseTensor

In [4]:
class Logger(object):
    def __init__(self, runs, info=None):
        self.info = info
        self.results = [[] for _ in range(runs)]

    def add_result(self, run, result):
        assert len(result) == 3
        assert run >= 0 and run < len(self.results)
        self.results[run].append(result)

    def print_statistics(self, run=None, print_all=True):
        if run is not None:
            result = 100 * torch.tensor(self.results[run])
            argmax = result[:, 1].argmax().item()
            print(f'Run {run + 1:02d}:', flush=True)
            print(f'Highest Train: {result[:, 0].max():.2f}', flush=True)
            print(f'Highest Valid: {result[:, 1].max():.2f}', flush=True)
            print(f'  Final Train: {result[argmax, 0]:.2f}', flush=True)
            print(f'   Final Test: {result[argmax, 2]:.2f}', flush=True)
        else:
            result = 100 * torch.tensor(self.results)

            best_results = []
            for r in result:
                train1 = r[:, 0].max().item()
                valid = r[:, 1].max().item()
                train2 = r[r[:, 1].argmax(), 0].item()
                test = r[r[:, 1].argmax(), 2].item()
                best_results.append((train1, valid, train2, test))

            best_result = torch.tensor(best_results)

            print(f'All runs:', flush=True)
            if print_all:
                r = best_result[:, 0]
                print(f'Highest Train: {r.mean():.2f} ± {r.std():.2f}', flush=True)
                r = best_result[:, 1]
                print(f'Highest Valid: {r.mean():.2f} ± {r.std():.2f}', flush=True)
                r = best_result[:, 2]
                print(f'  Final Train: {r.mean():.2f} ± {r.std():.2f}', flush=True)
            r = best_result[:, 3]
            print(f'   Final Test: {r.mean():.2f} ± {r.std():.2f}', flush=True)

In [5]:
class LabelPropagation(MessagePassing):
    def __init__(self, num_layers: int, alpha: float):
        super(LabelPropagation, self).__init__(aggr='add')
        self.num_layers = num_layers
        self.alpha = alpha

    @torch.no_grad()
    def forward(
            self, y: Tensor, edge_index: Adj, mask: Optional[Tensor] = None,
            edge_weight: OptTensor = None,
            post_step: Callable = lambda y: y.clamp_(0., 1.)
    ) -> Tensor:
        """"""

        if y.dtype == torch.long:
            y = F.one_hot(y.view(-1)).to(torch.float)

        out = y
        if mask is not None:
            out = torch.zeros_like(y)
            out[mask] = y[mask]

        if isinstance(edge_index, SparseTensor) and not edge_index.has_value():
            edge_index = gcn_norm(edge_index, add_self_loops=False)
        elif isinstance(edge_index, Tensor) and edge_weight is None:
            edge_index, edge_weight = gcn_norm(edge_index, num_nodes=y.size(0),
                                               add_self_loops=False)

        res = (1 - self.alpha) * out
        for _ in range(self.num_layers):
            # propagate_type: (y: Tensor, edge_weight: OptTensor)
            out = self.propagate(edge_index, x=out, edge_weight=edge_weight,
                                 size=None)
            out.mul_(self.alpha).add_(res)
            out = post_step(out)

        return out

    def message(self, x_j: Tensor, edge_weight: OptTensor) -> Tensor:
        return x_j if edge_weight is None else edge_weight.view(-1, 1) * x_j

    def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor:
        return matmul(adj_t, x, reduce=self.aggr)

    def __repr__(self):
        return '{}(num_layers={}, alpha={})'.format(self.__class__.__name__,
                                                    self.num_layers,
                                                    self.alpha)

In [6]:
class CorrectAndSmooth(torch.nn.Module):
    def __init__(self, num_correction_layers: int, correction_alpha: float,
                 num_smoothing_layers: int, smoothing_alpha: float,
                 autoscale: bool = True, scale: float = 1.0):
        super(CorrectAndSmooth, self).__init__()
        self.autoscale = autoscale
        self.scale = scale

        self.prop1 = LabelPropagation(num_correction_layers, correction_alpha)
        self.prop2 = LabelPropagation(num_smoothing_layers, smoothing_alpha)

    def correct(self, y_soft: Tensor, y_true: Tensor, mask: Tensor,
                edge_index: Adj, edge_weight: OptTensor = None) -> Tensor:
        assert abs((float(y_soft.sum()) / y_soft.size(0)) - 1.0) < 1e-2

        numel = int(mask.sum()) if mask.dtype == torch.bool else mask.size(0)
        assert y_true.size(0) == numel

        if y_true.dtype == torch.long:
            y_true = F.one_hot(y_true.view(-1), y_soft.size(-1))
            y_true = y_true.to(y_soft.dtype)

        error = torch.zeros_like(y_soft)
        error[mask] = y_true - y_soft[mask]

        if self.autoscale:
            smoothed_error = self.prop1(error, edge_index,
                                        edge_weight=edge_weight,
                                        post_step=lambda x: x.clamp_(-1., 1.))

            sigma = error[mask].abs().sum() / numel
            scale = sigma / smoothed_error.abs().sum(dim=1, keepdim=True)
            scale[scale.isinf() | (scale > 1000)] = 1.0
            return y_soft + scale * smoothed_error
        else:

            def fix_input(x):
                x[mask] = error[mask]
                return x

            smoothed_error = self.prop1(error, edge_index,
                                        edge_weight=edge_weight,
                                        post_step=fix_input)
            return y_soft + self.scale * smoothed_error

    def smooth(self, y_soft: Tensor, y_true: Tensor, mask: Tensor,
               edge_index: Adj, edge_weight: OptTensor = None) -> Tensor:

        numel = int(mask.sum()) if mask.dtype == torch.bool else mask.size(0)
        assert y_true.size(0) == numel

        if y_true.dtype == torch.long:
            y_true = F.one_hot(y_true.view(-1), y_soft.size(-1))
            y_true = y_true.to(y_soft.dtype)

        y_soft[mask] = y_true

        return self.prop2(y_soft, edge_index, edge_weight=edge_weight)

    def __repr__(self):
        L1, alpha1 = self.prop1.num_layers, self.prop1.alpha
        L2, alpha2 = self.prop2.num_layers, self.prop2.alpha
        return (f'{self.__class__.__name__}(\n'
                f'    correct: num_layers={L1}, alpha={alpha1}\n'
                f'    smooth:  num_layers={L2}, alpha={alpha2}\n'
                f'    autoscale={self.autoscale}, scale={self.scale}\n'
                ')')

In [7]:
import argparse
import torch
from torch_geometric.nn import Node2Vec
from ogb.nodeproppred import PygNodePropPredDataset

def save_embedding(model):
    torch.save(model.embedding.weight.data.cpu(), 'embedding.pt')

In [8]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
# num_layers = 2
# dropout = 0.5
# epochs = 1
# batch_size = 128
# walk_length = 20
# lr = 0.01
# log_steps = 100
# walks_per_node = 1
# context_size = 10
# embedding_dim = 64 

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
num_layers = 3
dropout = 0.5
epochs = 1
batch_size = 256
walk_length = 40
lr = 0.01
log_steps = 100
walks_per_node = 10
context_size = 20
embedding_dim = 128

dataset = PygNodePropPredDataset(name='ogbn-products', root='./products/')
data = dataset[0]

This will download 1.38GB. Will you proceed? (y/N)
y
Downloading http://snap.stanford.edu/ogb/data/nodeproppred/products.zip


Downloaded 1.38 GB: 100%|██████████| 1414/1414 [01:24<00:00, 16.79it/s]


Extracting ./products/products.zip


Processing...


Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]


Converting graphs into PyG objects...


100%|██████████| 1/1 [00:00<00:00, 1115.21it/s]


Saving...


Done!


In [10]:
model = Node2Vec(data.edge_index, embedding_dim, walk_length,
                     context_size, walks_per_node,
                     sparse=True).to(device)

loader = model.loader(batch_size=batch_size, shuffle=True,
                          num_workers=4)
optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=lr)

model.train()
for epoch in range(1, epochs + 1):
    for i, (pos_rw, neg_rw) in enumerate(loader):
      optimizer.zero_grad()
      loss = model.loss(pos_rw.to(device), neg_rw.to(device))
      loss.backward()    
      optimizer.step()

      if (i + 1) % log_steps == 0:
        print(f'Epoch: {epoch:02d}, Step: {i+1:03d}/{len(loader)}, 'f'Loss: {loss:.4f}')

      if (i + 1) % 100 == 0:  
        save_embedding(model)
    save_embedding(model)

Epoch: 01, Step: 100/9567, Loss: 9.3861
Epoch: 01, Step: 200/9567, Loss: 8.5542
Epoch: 01, Step: 300/9567, Loss: 7.7124
Epoch: 01, Step: 400/9567, Loss: 6.8442
Epoch: 01, Step: 500/9567, Loss: 6.1089
Epoch: 01, Step: 600/9567, Loss: 5.4126
Epoch: 01, Step: 700/9567, Loss: 4.7310
Epoch: 01, Step: 800/9567, Loss: 4.2415
Epoch: 01, Step: 900/9567, Loss: 3.7483
Epoch: 01, Step: 1000/9567, Loss: 3.3870
Epoch: 01, Step: 1100/9567, Loss: 3.0267
Epoch: 01, Step: 1200/9567, Loss: 2.7368
Epoch: 01, Step: 1300/9567, Loss: 2.4965
Epoch: 01, Step: 1400/9567, Loss: 2.3073
Epoch: 01, Step: 1500/9567, Loss: 2.1435
Epoch: 01, Step: 1600/9567, Loss: 1.9940
Epoch: 01, Step: 1700/9567, Loss: 1.9041
Epoch: 01, Step: 1800/9567, Loss: 1.7844
Epoch: 01, Step: 1900/9567, Loss: 1.6951
Epoch: 01, Step: 2000/9567, Loss: 1.6243
Epoch: 01, Step: 2100/9567, Loss: 1.5495
Epoch: 01, Step: 2200/9567, Loss: 1.4972
Epoch: 01, Step: 2300/9567, Loss: 1.4368
Epoch: 01, Step: 2400/9567, Loss: 1.3817
Epoch: 01, Step: 2500/956

## Dataset

In [11]:
dataset = PygNodePropPredDataset(name='ogbn-products', root='./products/')
print(dataset, flush=True)

PygNodePropPredDataset()


In [12]:
data = dataset[0]
print(data, flush=True)

Data(num_nodes=2449029, edge_index=[2, 123718280], x=[2449029, 100], y=[2449029, 1])


In [13]:
# split_idx contains a dictionary of train, validation and test node indices
split_idx = dataset.get_idx_split()
# predefined ogb evaluator method used for validation of predictions
evaluator = Evaluator(name='ogbn-products')

In [14]:
# lets check the node ids distribution of train, test and val
print('Number of training nodes:', split_idx['train'].size(0))
print('Number of validation nodes:', split_idx['valid'].size(0))
print('Number of test nodes:', split_idx['test'].size(0))

Number of training nodes: 196615
Number of validation nodes: 39323
Number of test nodes: 2213091


In [15]:
# lets check some graph statistics of ogb-product graph
print("Number of nodes in the graph:", data.num_nodes)
print("Number of edges in the graph:", data.num_edges)
print("---------------------------------------------")
print("Node feature matrix with shape:", data.x.shape) # [num_nodes, num_node_features]
print("Graph connectivity in COO format with shape:", data.edge_index.shape) # [2, num_edges]
print("Target to train against :", data.y.shape) 
print("Node feature length", dataset.num_features)

Number of nodes in the graph: 2449029
Number of edges in the graph: 123718280
---------------------------------------------
Node feature matrix with shape: torch.Size([2449029, 100])
Graph connectivity in COO format with shape: torch.Size([2, 123718280])
Target to train against : torch.Size([2449029, 1])
Node feature length 100


In [16]:
train_idx = split_idx['train']
test_idx = split_idx['test']

In [17]:
train_loader = NeighborSampler(data.edge_index, node_idx=train_idx,
                               sizes=[15, 10, 5], batch_size=1024,
                               shuffle=True, num_workers=12)



In [18]:
subgraph_loader = NeighborSampler(data.edge_index, node_idx=None, sizes=[-1],
                                  batch_size=1024, shuffle=False,
                                  num_workers=12)

In [19]:
class SAGE(torch.nn.Module):
    def __init__(self, dataset, in_channels, hidden_channels, num_layers=3):
        super(SAGE, self).__init__()

        self.num_layers = num_layers
        self.convs = nn.ModuleList()
        self.bns = nn.ModuleList()

        self.convs.append(SAGEConv(in_channels, hidden_channels))
        self.bns.append(nn.BatchNorm1d(hidden_channels))

        for i in range(self.num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
            self.bns.append(nn.BatchNorm1d(hidden_channels))

        self.convs.append(SAGEConv(hidden_channels, dataset.num_classes))

    # def reset_masks(self):
    #     self.masks = [None] * (self.num_layers - 1)

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()
        # for skip in self.skips:
        #     skip.reset_parameters()

    def forward(self, x, adjs):
        # `train_loader` computes the k-hop neighborhood of a batch of nodes,
        # and returns, for each layer, a bipartite graph object, holding the
        # bipartite edges `edge_index`, the index `e_id` of the original edges,
        # and the size/shape `size` of the bipartite graph.
        # Target nodes are also included in the source nodes so that one can
        # easily apply skip-connections or add self-loops.
        for i, (adj_t, _, size) in enumerate(adjs):
            x_target = x[:size[1]]  # Target nodes are always placed first.
            x = self.convs[i]((x, x_target), adj_t)
            # x = x + self.skips[i](x_target)
            if i != self.num_layers - 1:
                x = self.bns[i](x)
                x = F.relu(x)
                x = F.dropout(x, p=0.5, training=self.training)
        return x.log_softmax(dim=-1)

    def inference(self, x_all):
        pbar = tqdm(total=x_all.size(0) * self.num_layers)
        pbar.set_description('Evaluating')

        # Compute representations of nodes layer by layer, using *all*
        # available edges. This leads to faster computation in contrast to
        # immediately computing the final representations of each batch.
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        total_edges = 0
        for i in range(self.num_layers):
            xs = []
            for batch_size, n_id, adj in subgraph_loader:
                adj_t, _, size = adj.to(device)
                total_edges += adj_t.size(1)
                x = x_all[n_id].to(device)
                x_target = x[:size[1]]
                x = self.convs[i]((x, x_target), adj_t)
                # x = x + self.skips[i](x_target)

                if i != self.num_layers - 1:
                    x = self.bns[i](x)
                    x = F.relu(x)
                xs.append(x.cpu())

                pbar.update(batch_size)

            x_all = torch.cat(xs, dim=0)

        pbar.close()

        return x_all

In [20]:
x = data.x
embedding = torch.load('embedding.pt', map_location='cpu')
x = torch.cat([x, embedding], dim=-1)

In [21]:
model = SAGE(dataset=dataset, in_channels=x.size(-1), hidden_channels=128, num_layers=3)
print(model, flush=True)

SAGE(
  (convs): ModuleList(
    (0): SAGEConv(228, 128, aggr=mean)
    (1): SAGEConv(128, 128, aggr=mean)
    (2): SAGEConv(128, 47, aggr=mean)
  )
  (bns): ModuleList(
    (0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)


In [22]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device, flush=True)
model.to(device)
data = data.to(device)

cuda


In [23]:
x = x.to(device)
y = data.y.squeeze().to(device)
train_idx = split_idx['train'].to(device)
val_idx = split_idx['valid'].to(device)
test_idx = split_idx['test'].to(device)
x_train, y_train = x[train_idx], y[train_idx]

In [24]:
def process_adj(data):
    N = data.num_nodes
    data.edge_index = to_undirected(data.edge_index, data.num_nodes)

    row, col = data.edge_index

    adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N))
    adj = adj.set_diag()
    deg = adj.sum(dim=1).to(torch.float)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    adj = deg_inv_sqrt.view(-1, 1) * adj * deg_inv_sqrt.view(1, -1)
    return adj

In [25]:
DAD = process_adj(data).to(device)

In [26]:
criterion = nn.NLLLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.003)

In [27]:
def train():
    model.train()

    pbar = tqdm(total=train_idx.size(0))
    pbar.set_description(f'Epoch {epoch:02d}')

    total_loss = total_correct = 0
    for batch_size, n_id, adjs in train_loader:
        # `adjs` holds a list of `(edge_index, e_id, size)` tuples.
        adjs = [adj.to(device) for adj in adjs]

        out = model(x[n_id], adjs)
        loss = F.nll_loss(out, y[n_id[:batch_size]])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += float(loss)
        total_correct += int(out.argmax(dim=-1).eq(y[n_id[:batch_size]]).sum())
        pbar.update(batch_size)

    pbar.close()

    loss = total_loss / len(train_loader)
    approx_acc = total_correct / int(train_idx.size(0))

    return loss, approx_acc

In [28]:
@torch.no_grad()
def test(out=None):
    model.eval()

    out = model.inference(x) if out is None else out
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': data.y[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': data.y[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': data.y[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['acc']

    return train_acc, valid_acc, test_acc, out

In [29]:
if __name__ == '__main__':
    runs = 2
    logger = Logger(runs)

    for run in range(runs):
        print(sum(p.numel() for p in model.parameters()), flush=True)

        print('', flush=True)
        print(f'Run {run + 1:02d}:', flush=True)
        print('', flush=True)

        model.reset_parameters()

        best_val_acc = 0

        for epoch in range(15):
            loss, acc = train()
            # print('Epoch {:03d} train_loss: {:.4f}'.format(epoch, loss))
            print(f'Epoch {epoch:02d}, Loss: {loss:.4f}, Approx. Train: {acc:.4f}', flush=True)

            if (epoch + 1) > 5:

                train_acc, val_acc, test_acc, out = test()
                result = (train_acc, val_acc, test_acc)
                # print(f'Train: {train_acc:.4f}, Val: {valid_acc:.4f}, 'f'Test: {test_acc:.4f}')
                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    y_soft = out.softmax(dim=-1).to(device)

                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * val_acc:.2f}% '
                      f'Test: {100 * test_acc:.2f}%', flush=True)

            # logger.add_result(run, result)

        # post = CorrectAndSmooth(num_correction_layers=50, correction_alpha=1.0,
        #                         num_smoothing_layers=50, smoothing_alpha=0.8,
        #                         autoscale=False, scale=20.)

        post = CorrectAndSmooth(num_correction_layers=100, correction_alpha=0.8,
                                num_smoothing_layers=100, smoothing_alpha=0.8,
                                autoscale=False, scale=10.)

        print('Correct and smooth...', flush=True)

        y_soft = post.correct(y_soft, y_train, train_idx, DAD)
        y_soft = post.smooth(y_soft, y_train, train_idx, DAD)
        print('Done!', flush=True)

        train_acc, val_acc, test_acc, _ = test(y_soft)
        print(f'Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}', flush=True)

        result = (train_acc, val_acc, test_acc)
        logger.add_result(run, result)

    logger.print_statistics()

103983

Run 01:



Epoch 00: 100%|██████████| 196615/196615 [00:49<00:00, 3944.90it/s]

Epoch 00, Loss: 0.5291, Approx. Train: 0.8661



Epoch 01: 100%|██████████| 196615/196615 [00:48<00:00, 4033.03it/s]

Epoch 01, Loss: 0.3474, Approx. Train: 0.9026



Epoch 02: 100%|██████████| 196615/196615 [00:49<00:00, 4007.93it/s]

Epoch 02, Loss: 0.3207, Approx. Train: 0.9088



Epoch 03: 100%|██████████| 196615/196615 [00:48<00:00, 4057.23it/s]

Epoch 03, Loss: 0.3101, Approx. Train: 0.9125



Epoch 04: 100%|██████████| 196615/196615 [00:48<00:00, 4040.44it/s]

Epoch 04, Loss: 0.3004, Approx. Train: 0.9139



Epoch 05: 100%|██████████| 196615/196615 [00:48<00:00, 4085.26it/s]

Epoch 05, Loss: 0.2929, Approx. Train: 0.9159



Evaluating: 100%|██████████| 7347087/7347087 [01:40<00:00, 73421.10it/s]


Run: 01, Epoch: 05, Loss: 0.2929, Train: 92.67%, Valid: 91.83% Test: 80.08%


Epoch 06: 100%|██████████| 196615/196615 [00:49<00:00, 3999.21it/s]

Epoch 06, Loss: 0.3040, Approx. Train: 0.9168



Evaluating: 100%|██████████| 7347087/7347087 [01:35<00:00, 77168.72it/s]

Run: 01, Epoch: 06, Loss: 0.3040, Train: 92.58%, Valid: 91.66% Test: 79.99%



Epoch 07: 100%|██████████| 196615/196615 [00:49<00:00, 3973.16it/s]

Epoch 07, Loss: 0.3066, Approx. Train: 0.9124



Evaluating: 100%|██████████| 7347087/7347087 [01:32<00:00, 79075.64it/s]

Run: 01, Epoch: 07, Loss: 0.3066, Train: 92.63%, Valid: 91.77% Test: 79.96%



Epoch 08: 100%|██████████| 196615/196615 [00:49<00:00, 3990.93it/s]

Epoch 08, Loss: 0.2842, Approx. Train: 0.9178



Evaluating: 100%|██████████| 7347087/7347087 [01:33<00:00, 78945.11it/s]

Run: 01, Epoch: 08, Loss: 0.2842, Train: 92.77%, Valid: 91.76% Test: 79.68%



Epoch 09: 100%|██████████| 196615/196615 [00:49<00:00, 3986.30it/s]

Epoch 09, Loss: 0.2785, Approx. Train: 0.9197



Evaluating: 100%|██████████| 7347087/7347087 [01:34<00:00, 77605.78it/s]


Run: 01, Epoch: 09, Loss: 0.2785, Train: 93.06%, Valid: 92.01% Test: 79.68%


Epoch 10: 100%|██████████| 196615/196615 [00:49<00:00, 4000.31it/s]

Epoch 10, Loss: 0.2807, Approx. Train: 0.9193



Evaluating: 100%|██████████| 7347087/7347087 [01:34<00:00, 77905.37it/s]

Run: 01, Epoch: 10, Loss: 0.2807, Train: 93.12%, Valid: 91.97% Test: 79.18%



Epoch 11: 100%|██████████| 196615/196615 [00:49<00:00, 4007.72it/s]

Epoch 11, Loss: 0.2708, Approx. Train: 0.9210



Evaluating: 100%|██████████| 7347087/7347087 [01:35<00:00, 76928.28it/s]

Run: 01, Epoch: 11, Loss: 0.2708, Train: 93.21%, Valid: 91.97% Test: 80.21%



Epoch 12: 100%|██████████| 196615/196615 [00:49<00:00, 3984.66it/s]

Epoch 12, Loss: 0.2709, Approx. Train: 0.9221



Evaluating: 100%|██████████| 7347087/7347087 [01:32<00:00, 79489.96it/s]

Run: 01, Epoch: 12, Loss: 0.2709, Train: 93.15%, Valid: 91.82% Test: 80.17%



Epoch 13: 100%|██████████| 196615/196615 [00:49<00:00, 3997.47it/s]

Epoch 13, Loss: 0.2799, Approx. Train: 0.9210



Evaluating: 100%|██████████| 7347087/7347087 [01:35<00:00, 77245.60it/s]


Run: 01, Epoch: 13, Loss: 0.2799, Train: 93.36%, Valid: 92.11% Test: 79.89%


Epoch 14: 100%|██████████| 196615/196615 [00:49<00:00, 3964.77it/s] 

Epoch 14, Loss: 0.2773, Approx. Train: 0.9198



Evaluating: 100%|██████████| 7347087/7347087 [01:33<00:00, 78605.89it/s]

Run: 01, Epoch: 14, Loss: 0.2773, Train: 93.35%, Valid: 91.96% Test: 79.90%
Correct and smooth...





Done!
Train: 0.9701, Val: 0.9225, Test: 0.8152
103983

Run 02:



Epoch 00: 100%|██████████| 196615/196615 [00:49<00:00, 4001.67it/s]

Epoch 00, Loss: 0.5126, Approx. Train: 0.8665



Epoch 01: 100%|██████████| 196615/196615 [00:49<00:00, 3988.38it/s]

Epoch 01, Loss: 0.3532, Approx. Train: 0.9021



Epoch 02: 100%|██████████| 196615/196615 [00:49<00:00, 3976.20it/s]

Epoch 02, Loss: 0.3306, Approx. Train: 0.9065



Epoch 03: 100%|██████████| 196615/196615 [00:48<00:00, 4018.80it/s]

Epoch 03, Loss: 0.3213, Approx. Train: 0.9098



Epoch 04: 100%|██████████| 196615/196615 [00:49<00:00, 4011.25it/s]

Epoch 04, Loss: 0.3115, Approx. Train: 0.9117



Epoch 05: 100%|██████████| 196615/196615 [00:49<00:00, 3997.96it/s]

Epoch 05, Loss: 0.3030, Approx. Train: 0.9127



Evaluating: 100%|██████████| 7347087/7347087 [01:34<00:00, 77688.87it/s]


Run: 02, Epoch: 05, Loss: 0.3030, Train: 92.23%, Valid: 91.49% Test: 79.86%


Epoch 06: 100%|██████████| 196615/196615 [00:48<00:00, 4023.53it/s]

Epoch 06, Loss: 0.2915, Approx. Train: 0.9162



Evaluating: 100%|██████████| 7347087/7347087 [01:32<00:00, 79676.01it/s]

Run: 02, Epoch: 06, Loss: 0.2915, Train: 92.43%, Valid: 91.46% Test: 79.43%



Epoch 07: 100%|██████████| 196615/196615 [00:49<00:00, 4001.74it/s]

Epoch 07, Loss: 0.2929, Approx. Train: 0.9157



Evaluating: 100%|██████████| 7347087/7347087 [01:33<00:00, 78244.32it/s]


Run: 02, Epoch: 07, Loss: 0.2929, Train: 92.66%, Valid: 91.62% Test: 79.88%


Epoch 08: 100%|██████████| 196615/196615 [00:50<00:00, 3930.81it/s]

Epoch 08, Loss: 0.2882, Approx. Train: 0.9189



Evaluating: 100%|██████████| 7347087/7347087 [01:32<00:00, 79216.76it/s]


Run: 02, Epoch: 08, Loss: 0.2882, Train: 92.85%, Valid: 91.82% Test: 79.82%


Epoch 09: 100%|██████████| 196615/196615 [00:49<00:00, 3941.35it/s]

Epoch 09, Loss: 0.2893, Approx. Train: 0.9167



Evaluating: 100%|██████████| 7347087/7347087 [01:34<00:00, 77774.60it/s]

Run: 02, Epoch: 09, Loss: 0.2893, Train: 92.99%, Valid: 91.81% Test: 79.62%



Epoch 10: 100%|██████████| 196615/196615 [00:50<00:00, 3926.63it/s]

Epoch 10, Loss: 0.2808, Approx. Train: 0.9187



Evaluating: 100%|██████████| 7347087/7347087 [01:32<00:00, 79096.56it/s]


Run: 02, Epoch: 10, Loss: 0.2808, Train: 92.99%, Valid: 91.93% Test: 79.63%


Epoch 11: 100%|██████████| 196615/196615 [00:49<00:00, 3945.04it/s]

Epoch 11, Loss: 0.2765, Approx. Train: 0.9204



Evaluating: 100%|██████████| 7347087/7347087 [01:34<00:00, 78057.58it/s]

Run: 02, Epoch: 11, Loss: 0.2765, Train: 93.05%, Valid: 91.85% Test: 79.39%



Epoch 12: 100%|██████████| 196615/196615 [00:49<00:00, 3993.47it/s]

Epoch 12, Loss: 0.2806, Approx. Train: 0.9194



Evaluating: 100%|██████████| 7347087/7347087 [01:32<00:00, 79001.75it/s]


Run: 02, Epoch: 12, Loss: 0.2806, Train: 93.05%, Valid: 91.97% Test: 79.63%


Epoch 13: 100%|██████████| 196615/196615 [00:49<00:00, 3993.35it/s]

Epoch 13, Loss: 0.2801, Approx. Train: 0.9199



Evaluating: 100%|██████████| 7347087/7347087 [01:34<00:00, 77829.45it/s]


Run: 02, Epoch: 13, Loss: 0.2801, Train: 93.24%, Valid: 92.13% Test: 79.13%


Epoch 14: 100%|██████████| 196615/196615 [00:50<00:00, 3917.91it/s]

Epoch 14, Loss: 0.2822, Approx. Train: 0.9179



Evaluating: 100%|██████████| 7347087/7347087 [01:33<00:00, 78757.61it/s]

Run: 02, Epoch: 14, Loss: 0.2822, Train: 93.20%, Valid: 91.92% Test: 79.66%
Correct and smooth...





Done!
Train: 0.9700, Val: 0.9232, Test: 0.8073
All runs:
Highest Train: 97.00 ± 0.01
Highest Valid: 92.29 ± 0.04
  Final Train: 97.00 ± 0.01
   Final Test: 81.13 ± 0.56
