# Text Classification - Training a GNN with PyTorch Geometric


## $\color{blue}{Sections:}$

* Preamble
* Admin
* Data
* Model
* Sampling
* Train - Validate


## $\color{blue}{Preamble:}$

We now train a GNN in PyTorch Geometric. We will keep the network quite close to the previous version. But it may lead to commutational efficiency and potentiall easier iteration.

Note there are now version issues with SparseTensor, we require a stable versioning setup between torch-sparse, torch, and torch-geometric.

## $\color{blue}{Admin}$
* Install relevant Libraries
* Import relevant Libraries

In [None]:
import torch
import pandas as pd
from google.colab import drive
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
drive.mount("/content/drive")
%cd '/content/drive/MyDrive'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive


In [None]:
import torch
!pip uninstall torch-scatter torch-sparse torch-geometric torch-cluster  --y
!pip install --no-index torch-scatter -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install --no-index torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install --no-index torch-cluster -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install git+https://github.com/pyg-team/pytorch_geometric.git

[0mLooking in links: https://data.pyg.org/whl/torch-2.6.0+cu124.html
[31mERROR: Could not find a version that satisfies the requirement torch-scatter (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for torch-scatter[0m[31m
[0mLooking in links: https://data.pyg.org/whl/torch-2.6.0+cu124.html
[31mERROR: Could not find a version that satisfies the requirement torch-sparse (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for torch-sparse[0m[31m
[0mLooking in links: https://data.pyg.org/whl/torch-2.6.0+cu124.html
[31mERROR: Could not find a version that satisfies the requirement torch-cluster (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for torch-cluster[0m[31m
[0mCollecting git+https://github.com/pyg-team/pytorch_geometric.git
  Cloning https://github.com/pyg-team/pytorch_geometric.git to /tmp/pip-req-build-481aooot
  Running command git clone --filter=blob:none --quiet https://github.c

## $\color{blue}{Data}$

* Connect to Drive
* Load the data
* Load adjacency matrices
* Instantiate PyTorch Geometric Data objects

In [None]:
path = 'class/datasets/'
df_train = pd.read_pickle(path + 'df_train')
df_dev = pd.read_pickle(path + 'df_dev')
df_test = pd.read_pickle(path + 'df_test')

In [None]:
path = 'class/tensors/adj_{}.pt'

# train
train_people = torch.load(path.format('train_people'), weights_only=True)
train_locations = torch.load(path.format('train_locations'), weights_only=True)
train_entities = torch.load(path.format('train_entities'), weights_only=True)

# dev
dev_people = torch.load(path.format('dev_people'), weights_only=True)
dev_locations = torch.load(path.format('dev_locations'), weights_only=True)
dev_entities = torch.load(path.format('dev_entities'), weights_only=True)

# val (contains the adjacency matrix for both the training and the development set)
val_people = torch.load(path.format('val_people.1'), weights_only=True)
val_locations = torch.load(path.format('val_locations.1'), weights_only=True)
val_entities = torch.load(path.format('val_entities.1'), weights_only=True)

In [None]:
df1 = df_train[['index', 'chapter_idx', 'vanilla_embedding.1']]
df2 = df_dev[['index', 'chapter_idx', 'vanilla_embedding.1']]
df_val = pd.concat([df2,df1])

In [None]:
# inputs
H_train = torch.stack(list(df_train['vanilla_embedding.1'])).to(device)
labels_train = torch.LongTensor(list(df_train['chapter_idx'])).to(device)

H_dev = torch.stack(list(df_dev['vanilla_embedding.1'])).to(device)
labels_dev = torch.LongTensor(list(df_dev['chapter_idx'])).to(device)

H_val = torch.stack(list(df_val['vanilla_embedding.1'])).to(device)
labels_val = torch.LongTensor(list(df_val['chapter_idx'])).to(device)

In [None]:
# train relationships where edge index is a tuple [0][0] > [1][0] The first element of list one, links to first element of list 2
train_edge_index = train_entities.nonzero(as_tuple=True)
train_edge_index = torch.stack(train_edge_index).long().to(device)
# train_edge_relation = torch.zeros(train_edge_index.size(1), dtype=torch.long)

dev_edge_index = dev_entities.nonzero(as_tuple=True)
dev_edge_index = torch.stack(dev_edge_index).long().to(device)
# dev_edge_relation = torch.zeros(dev_edge_index.size(1), dtype=torch.long)

val_edge_index = val_entities.nonzero(as_tuple=True)
val_edge_index = torch.stack(val_edge_index).long().to(device)
# val_edge_relation = torch.zeros(val_edge_index.size(1), dtype=torch.long)

In [None]:
from torch_geometric.data import Data

train_data = Data(x=H_train, edge_index=train_edge_index, y=labels_train)
dev_data = Data(x=H_dev, edge_index=dev_edge_index, y=labels_dev)
val_data = Data(x=H_val, edge_index=val_edge_index, y=labels_val)

## $\color{blue}{Model}$


In [None]:
from torch_geometric.utils import degree
from torch_geometric.nn.conv import MessagePassing
from torch import nn
from torch.nn import functional as F

class GNNLayer(MessagePassing):
  def __init__(self, in_channels, out_channels, dropout=0.55):
    super(GNNLayer, self).__init__(aggr='add') # Use 'add' aggregation.
    self.in_channels = in_channels
    self.out_channels = out_channels
    self.dropout = dropout

    # Weight matrices for node self-projection and message passing
    self.T = nn.Parameter(torch.Tensor(in_channels, out_channels))
    self.E = nn.Parameter(torch.Tensor(in_channels, out_channels))

    # Batch normalization
    self.batch_norm = nn.BatchNorm1d(out_channels)
    self.reset_parameters()

  def reset_parameters(self):
    nn.init.xavier_uniform_(self.T)
    nn.init.xavier_uniform_(self.E)

  def forward(self, x, edge_index):
    # Calculate node degrees
    row, col = edge_index
    deg = degree(col, x.size(0), dtype=x.dtype)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

    # Create normalization parameters
    norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

    # Propagate messages based on edge_index
    transformed_x = x @ self.E
    messages = self.propagate(edge_index, x=transformed_x, norm=norm)

    # Self-projection
    self_proj = x @ self.T

    # Combine and process messages with skip connection
    out = x + F.leaky_relu(self_proj + messages)

    # Apply batch normalization
    out = self.batch_norm(out)

    # Apply dropout
    out = F.dropout(out, p=self.dropout, training=self.training)

    return out

In [None]:
class GNNModel(nn.Module):
    def __init__(self, d, h, c, num_layers=2, dropout_rate=0.55):
        super(GNNModel, self).__init__()
        self.num_layers = num_layers
        self.gnn_layers = nn.ModuleList([GNNLayer(d, d, dropout_rate) for _ in range(num_layers)])
        self.fc1 = nn.Linear(d, h)
        self.batch_norm_fc1 = nn.BatchNorm1d(h)
        self.fc2 = nn.Linear(h, c)
        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.ReLU()

    def forward(self, x, edge_index):
        for layer in self.gnn_layers:
            x = layer(x, edge_index)

        x = self.relu(self.batch_norm_fc1(self.dropout(self.fc1(x))))
        Output = self.fc2(x)
        return Output

    def forward_layer(self, x, edge_index, layer_idx):
        """Forward pass for a specific layer."""
        x = self.gnn_layers[layer_idx](x, edge_index)
        return x

In [None]:
d = 768
h = 400   # hidden dimension of fully connected layer
c = 70   # number of classes
num_relations = 1   # number of relationship types

# Model, Loss, Optimizer
model = GNNModel(d, h, c, num_relations)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

In [None]:
def count_parameters_per_module(model):
    print("Module and parameter counts:")

    for name, module in model.named_modules():
        # Skip the top-level module (the model itself)
        if not isinstance(module, nn.Module) or name == "":
            continue

        param_count = sum(p.numel() for p in module.parameters() if p.requires_grad)

        if param_count > 0:  # Only print modules that have parameters
            print(f"{name}: {param_count} parameters")

In [None]:
count_parameters_per_module(model)

Module and parameter counts:
gnn_layers: 1181184 parameters
gnn_layers.0: 1181184 parameters
gnn_layers.0.batch_norm: 1536 parameters
fc1: 307600 parameters
batch_norm_fc1: 800 parameters
fc2: 28070 parameters


## $\color{blue}{Sampling}$


In [None]:
from torch_geometric.loader import NeighborSampler
edge_index = train_data.edge_index
# Get unique linked nodes from edge_index
linked_nodes = torch.unique(edge_index[0])  # Get source nodes
linked_nodes = torch.unique(torch.cat([edge_index[0], edge_index[1]]))  # Get both ends of edges

# Now you can pass linked_nodes to NeighborSampler
train_sampler = NeighborSampler(
  train_data.edge_index,
  node_idx=linked_nodes,  # Use only linked nodes
  sizes=[4,4],
  batch_size=64,
  shuffle=True,
  num_workers=0
)

In [None]:
# Now you can pass linked_nodes to NeighborSampler
val_sampler = NeighborSampler(
  val_data.edge_index,
  node_idx=torch.arange(964),  # Use only linked nodes
  sizes=[4,4],
  batch_size=1024,
  shuffle=False,
  num_workers=0
)

In [None]:
count = 0

for batch_size, n_id, adj in train_sampler:
  if count < 1:
    print(f'batch size: {batch_size}')
    print(f'n_id: {n_id}')
    print(f'adj: {adj}')
    count += 1
  break


batch size: 64
n_id: tensor([ 2211, 10176,  7232,  ...,  4776, 11727,  5119])
adj: [EdgeIndex(edge_index=tensor([[  64,   67,  296,  ..., 1121, 1124, 1125],
        [   0,    1,    1,  ...,  295,  295,  295]]), e_id=tensor([890686, 293500, 597303,  ..., 374045, 367853, 919514]), size=(1127, 296)), EdgeIndex(edge_index=tensor([[ 64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
          78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
          92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105,
         106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,  86, 117, 118,
         119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
         133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
         147, 148, 149, 150, 151,  69, 152, 153, 154, 155, 156, 157, 158, 159,
         160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
         174, 175, 176, 177, 178, 179, 180, 181,

## $\color{blue}{Train-Validate}$


In [None]:
def accuracy(outputs, labels):
    # argmax to get predicted classes
    _, predicted = torch.max(outputs, 1)

    # count correct
    correct = (predicted == labels).sum().item()

    # get average
    acc = correct / labels.size(0)  # Total number of samples
    return acc

In [None]:
def extract_edge_index(adj):
    # Initialize lists to hold edge source and target indices
    rows = []
    cols = []

    for edge_index in adj:
        edge_index_tensor = edge_index.edge_index  # Extracting edge_index tensor
        rows.append(edge_index_tensor[0])  # Source nodes
        cols.append(edge_index_tensor[1])  # Target nodes

    # Concatenate the sources and targets into long tensors
    rows_tensor = torch.cat(rows, dim=0)
    cols_tensor = torch.cat(cols, dim=0)

    # Stack them into a new edge_index tensor
    new_edge_index = torch.stack([rows_tensor, cols_tensor], dim=0)

    return new_edge_index

In [None]:
import numpy as np

def train(model, sampler, criterion, optimizer):
    model.train()
    epoch_train_losses = []
    epoch_train_accuracy = []
    for batch_size, n_id, adj in sampler:
      optimizer.zero_grad()

      x = train_data.x[n_id].to(device)  ##### Change to train
      edge_index = extract_edge_index(adj).to(device)
      out = model(x, edge_index)
      y = train_data.y[n_id].to(device) #### Change to train


      train_loss = criterion(out, y)
      train_accuracy = accuracy(out, y)


      epoch_train_losses.append(train_loss.item())
      epoch_train_accuracy.append(train_accuracy)

      # Backpropagation and optimization
      train_loss.backward()
      optimizer.step()

    return np.mean(epoch_train_losses), np.mean(epoch_train_accuracy)

In [None]:
def mode(lstr):
  unique, counts = np.unique(lstr, return_counts=True)
  max_idx = np.argmax(counts)
  mode_val = unique[max_idx]
  return mode_val, lstr.index(mode_val)

In [None]:
import torch
from collections import defaultdict



def validate(model, sampler, criterion):
    """
    Validate the model on the validation dataset using the provided sampler.

    Parameters:
    - model: The model to be evaluated.
    - sampler: The sampler to sample validation data.
    - criterion: The loss function used for evaluation.

    Returns:
    - dev_loss: The calculated loss on the validation data.
    - dev_accuracy: The calculated accuracy on the validation data.
    """

    model.eval()

    aggregated_outputs = defaultdict(list)  # Store raw model outputs
    aggregated_predictions = defaultdict(list)  # Store predicted class labels
    aggregated_labels = defaultdict(list)  # Store true labels

    with torch.no_grad():
        for batch_size, n_id, adj in sampler:
            edge_index = extract_edge_index(adj).to(device)
            x = val_data.x[n_id].to(device)  # Assuming `data.x` is your node features
            out = model(x, edge_index)
            y = val_data.y[n_id].to(device)

            # Create a mask for indices less than 964
            mask = (n_id < 964)
            filtered_n_id = n_id[mask]
            filtered_out = out[mask]
            filtered_y = y[mask]

            # Determine the predicted class for each output
            _, predicted_classes = torch.max(filtered_out, dim=1)

            # Aggregate outputs, predicted classes, and true labels
            for idx, node_id in enumerate(filtered_n_id):
                aggregated_outputs[node_id].append(filtered_out[idx])  # Store the raw output
                aggregated_predictions[node_id].append(predicted_classes[idx].item())
                aggregated_labels[node_id].append(filtered_y[idx].item())  # Assume these are identical per node

    # final_predictions = []
    final_labels = []
    final_outputs = []  # Store outputs for the loss calculation

    for node_id in aggregated_predictions:
        # Get the most common predicted class
        most_common_prediction_idx = mode(aggregated_predictions[node_id])[1]

        # final_predictions.append(most_common_prediction)
        final_labels.append(aggregated_labels[node_id][most_common_prediction_idx])  # All labels are identical
        final_outputs.append(aggregated_outputs[node_id][most_common_prediction_idx])  # Store the corresponding output for loss calculation

    # Convert to tensors for loss computation
    #final_predictions = torch.tensor(final_predictions)
    final_labels = torch.tensor(final_labels).to(device)
    final_outputs = torch.stack(final_outputs).to(device)  # Stack outputs for loss computation

    dev_loss = criterion(final_outputs, final_labels)
    dev_accuracy = accuracy(final_outputs, final_labels)

    return dev_loss, dev_accuracy

In [None]:
import time

def tv_run(epochs, model, lr, alpha, max_accuracy, path, verbose = 0):
  """
  Runs a training setup
  verbose == 1 - print model results
  verbose == 2 -> print epoch and model results
  """
  model = model.to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=alpha)

  # Hold epoch stats
  train_losses = []
  train_accuracy = []
  dev_losses = []
  dev_accuracy = []
  epoch_holder = []

  # Break if no improvement
  current_best = 0
  no_improvement = 0


  # Run epochs
  for epoch in range(epochs):

    # break out of epochs
    if no_improvement >= 6:
      break

    # # trace
    torch.cuda.reset_peak_memory_stats()  # Reset memory stats
    start_time = time.time()
    #call training and validation functions
    train_loss, train_acc = train(model, trace_sampler, criterion, optimizer)
    print("\n--- Profiling Results for Training Phase ---")
    training_time = time.time() - start_time  # Calculate elapsed time
    max_train_memory = torch.cuda.max_memory_allocated()  # Get max GPU memory used during training
    print(f'Time: {training_time}\nMax memory: {max_train_memory}')

    torch.cuda.reset_peak_memory_stats()  # Reset memory stats
    start_time = time.time()
    print("\n--- Profiling Results for Validation Phase ---")
    dev_loss, dev_acc = validate(model, val_sampler, criterion)
    validation_time = time.time() - start_time  # Calculate elapsed time
    max_validation_memory = torch.cuda.max_memory_allocated()  # Get max GPU memory used during training
    print(f'Time: {validation_time}\nMax memory: {max_validation_memory}')

    # Store epoch stats
    train_losses.append(train_loss)
    train_accuracy.append(train_acc)
    dev_losses.append(dev_loss)
    dev_accuracy.append(dev_acc)
    epoch_holder.append(epoch + 1)

    # check for improvement
    if dev_acc > current_best:
      current_best = dev_acc
      no_improvement = 0
    else:
      no_improvement += 1

    # save best model
    if dev_acc > max_accuracy:
      torch.save(model.state_dict(), path)
      max_accuracy = dev_acc


    # optionally print epoch results
    if verbose == 2:
      print(f'\n --------- \nEpoch: {epoch + 1}\n')
      print(f'Epoch {epoch + 1} train loss: {train_loss:.4f}')
      print(f'Epoch {epoch + 1} train accuracy: {train_acc:.4f}')
      print(f'Epoch {epoch + 1} dev loss: {dev_loss:.4f}')
      print(f'Epoch {epoch + 1} dev accuracy: {dev_acc:.4f}')

      # save best results
  max_ind = np.argmax(dev_accuracy)

  stats = Stats(
      train_losses[max_ind],
      train_accuracy[max_ind],
      dev_losses[max_ind],
      dev_accuracy[max_ind],
      epoch_holder[max_ind],
      lr, alpha,
      max_accuracy
  )

  # optionally print model results
  if verbose in [1,2]:
    print('\n ######## \n')
    print(f'lr:{stats.lr}, alpha:{stats.alpha} @ epoch {stats.epoch}.')
    print(f'TL:{stats.train_loss}, TA:{stats.train_accuracy}.')
    print(f'DL:{stats.dev_loss}, DA:{stats.dev_accuracy}')

  return stats

#### $\color{red}{Sanity-check:}$

In [None]:
from collections import namedtuple
Stats = namedtuple('Stats', [
    'train_loss',
    'train_accuracy',
    'dev_loss',
    'dev_accuracy',
    'epoch',
    'lr',
    'alpha',
    'max_accuracy'
])

In [None]:
tv_run(epochs=1, model=model, lr=0.00003, alpha=0.0005, max_accuracy=0, path="binme", verbose=2)


--- Profiling Results for Training Phase ---
Time: 0.3142986297607422
Max memory: 212007424

--- Profiling Results for Validation Phase ---
Time: 0.07372665405273438
Max memory: 285145600

 --------- 
Epoch: 1

Epoch 1 train loss: 4.3206
Epoch 1 train accuracy: 0.0159
Epoch 1 dev loss: 4.2532
Epoch 1 dev accuracy: 0.0207

 ######## 

lr:3e-05, alpha:0.0005 @ epoch 1.
TL:4.320555246793306, TA:0.015879582113967398.
DL:4.253239631652832, DA:0.02074688796680498


Stats(train_loss=4.320555246793306, train_accuracy=0.015879582113967398, dev_loss=tensor(4.2532, device='cuda:0'), dev_accuracy=0.02074688796680498, epoch=1, lr=3e-05, alpha=0.0005, max_accuracy=0.02074688796680498)

In [None]:
def gen_config(lr_low, lr_high, alpha_low, alpha_high):
  np.random.seed()
  lr = round(10**float(np.random.uniform(lr_low,lr_high)),6)
  alpha = round(10**float(np.random.uniform(alpha_low,alpha_high)),6)
  return lr, alpha

In [None]:
def gen_ranges( lr, lr_range, alpha, alpha_range):

  lr_center = lr
  lr_low = lr_center - lr_range/2
  lr_high = lr_center + lr_range/2
  lr_diff = lr_high - lr_low

  alpha_center = alpha
  alpha_low = alpha_center - alpha_range/2
  alpha_high = alpha_center + alpha_range/2
  alpha_diff = alpha_high - alpha_low

  return (lr_low, lr_high, alpha_low, alpha_high)

In [None]:
def search_stats(results):
  best_stats = None
  max_dev_accuracy = 0
  for i in range(len(results)):
    acc = results[i].dev_accuracy
    if acc > max_dev_accuracy:
      best_stats = results[i]
      max_dev_accuracy = acc
  return best_stats

In [None]:
"""
Main Admin
"""
epochs = 60
max_accuracy = 0
path = "class/models/GNN_geom.1.pt"
results = []

"""
init random search
lr [10^-5 - 10^-1]
alpha [10^-5 - 10^-1]
bs [8, 32, 128]
"""
lr_low = -5
lr_high = -3
lr_range = lr_high - lr_low

alpha_low = -5
alpha_high = -3
alpha_range = alpha_high - alpha_low

d = 768
h = 400
c = 70
num_relations = 2

count = 0

"""
Hyperparameter Search
"""

for i in range(3):
  # debug
  print("\n################\n")
  print(f'round: {i}')
  # print(f'lr_low{lr_low}, lr_high{lr_high}, lr_range{lr_range}')
  # print(f'alpha_low{alpha_low}, lr_high{alpha_high}, lr_range{alpha_range}')
  print('max', max_accuracy)
  print("\n################\n")


  for j in range(12):
    count += 1
    print(count)

    # get config
    lr, alpha = gen_config(lr_low, lr_high, alpha_low, alpha_high)
    # define model
    model = GNNModel(d, h, c, num_relations)
    model = model.to(device)

    # run training
    res = tv_run(epochs, model, lr, alpha, max_accuracy, path, verbose = 1)
    max_accuracy = res.max_accuracy
    results.append(res)

  # get best result of the round or even so far
  stats = search_stats(results)


  print(stats) # debug

  # reconfigure the new hypers
  lr = np.log10(stats.lr)
  lr_range = lr_range / 3

  alpha = np.log10(stats.alpha)
  alpha_range = alpha_range / 3

  config = gen_ranges(lr, lr_range, alpha, alpha_range)
  lr_low, lr_high, alpha_low, alpha_high = config
  lr_range = lr_high - lr_low
  alpha_range = alpha_high - alpha_low



################

round: 0
max 0

################

1

 ######## 

lr:0.000336, alpha:0.000114 @ epoch 9.
TL:0.754186844165836, TA:0.7979960528766625.
DL:2.4498090744018555, DA:0.46265560165975106
2

 ######## 

lr:0.000105, alpha:1.3e-05 @ epoch 28.
TL:0.3983183858823031, TA:0.8944982536144503.
DL:2.672290563583374, DA:0.4896265560165975
3

 ######## 

lr:2.9e-05, alpha:0.000218 @ epoch 12.
TL:1.2118226741307547, TA:0.699926851022546.
DL:2.1563241481781006, DA:0.45850622406639
4

 ######## 

lr:1.3e-05, alpha:0.000124 @ epoch 29.
TL:1.104277127065829, TA:0.7273375425290917.
DL:2.122598886489868, DA:0.470954356846473
5

 ######## 

lr:0.000326, alpha:1.6e-05 @ epoch 15.
TL:0.5962365707196295, TA:0.8399614192225077.
DL:2.5478997230529785, DA:0.483402489626556
6

 ######## 

lr:0.000132, alpha:0.000899 @ epoch 8.
TL:0.821348932678146, TA:0.78892038595702.
DL:2.2576563358306885, DA:0.46265560165975106
7

 ######## 

lr:0.000386, alpha:1.4e-05 @ epoch 13.
TL:0.6596934408641287, TA:0.82139