In [1]:
import pandas as pd
import torch
from gnn_tracking.training.tc import TCModule
from pathlib import Path
from torch.profiler import profile, record_function, ProfilerActivity

# from object_condensation.pytorch.losses import condensation_loss

In [2]:
chkpt_home = Path(
    "/home/kl5675/Documents/23/git_sync/hyperparameter_optimization2/scripts/pixel/lightning_logs/"
)
assert chkpt_home.is_dir()
chkpt_path = (
    chkpt_home
    / "vagabond-tasteful-hyrax/checkpoints_persist/epoch=451-step=406800.ckpt"
)
assert chkpt_path.is_file()
data_home = Path(
    "/scratch/gpfs/IOJALVO/gnn-tracking/object_condensation/point_clouds_v10/"
)
assert data_home.is_dir()
data_path = data_home / "part_1" / "data21000_s0.pt"
assert data_path.is_file()

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
lmodel = TCModule.load_from_checkpoint(chkpt_path, map_location=device)

[36m[08:43:28] DEBUG: Getting class PreTrainedECGraphTCN from module gnn_tracking.models.track_condensation_networks[0m
  rank_zero_warn(
[36m[08:43:29] DEBUG: Getting class MLGraphConstruction from module gnn_tracking.models.graph_construction[0m
[36m[08:43:29] DEBUG: Getting class GraphConstructionFCNN from module gnn_tracking.models.graph_construction[0m
[36m[08:43:29] DEBUG: Getting class PotentialLoss from module gnn_tracking.metrics.losses[0m
[36m[08:43:29] DEBUG: Getting class DBSCANHyperParamScanner from module gnn_tracking.postprocessing.dbscanscanner[0m


In [7]:
data = torch.load(data_path)
data.to(device)
assert data
model = lmodel.model

In [8]:
dp = lmodel.preproc(data)

In [39]:
from timeit import default_timer as timer


class MemLogger:
    def __init__(self):
        self.mem = 0
        self._cols = ["desc", "Δmax", "max", "Δpersistent", "persistent", "Δtime"]
        self.data = []
        self._active = True
        self.time = timer()

    def deactivate(self):
        self._active = False

    def activate(self):
        self._active = True

    def log(self, desc=""):
        if not self._active:
            return
        current = torch.cuda.memory_allocated() / 1e9
        current_max = torch.cuda.max_memory_allocated() / 1e9
        added = current - self.mem
        added_max = current_max - self.mem
        self.data.append(
            (desc, added_max, current_max, added, current, timer() - self.time)
        )
        self.mem = current
        self.time = timer()
        torch.cuda.empty_cache()
        torch.cuda.reset_peak_memory_stats()

    def get_df(self):
        return pd.DataFrame.from_records(self.data, columns=self._cols)

In [47]:
from torch import Tensor as T
from torch.nn.functional import relu


# @torch.compile
def condensation_loss_tiger(
    *,
    beta: T,
    x: T,
    object_id: T,
    weights: T,
    q_min: float,
    noise_threshold: int,
    max_n_rep: int,
) -> dict[str, T]:
    # To protect against nan in divisions
    eps = 1e-9

    # x: n_nodes x n_outdim
    not_noise = object_id > noise_threshold
    unique_oids = torch.unique(object_id[not_noise])
    assert len(unique_oids) > 0, "No particles found, cannot evaluate loss"
    # n_nodes x n_pids
    # The nodes in every column correspond to the hits of a single particle and
    # should attract each other
    attractive_mask = object_id.view(-1, 1) == unique_oids.view(1, -1)

    q = torch.arctanh(beta) ** 2 + q_min
    assert not torch.isnan(q).any(), "q contains NaNs"
    # n_objs
    alphas = torch.argmax(q.view(-1, 1) * attractive_mask, dim=0)

    # _j means indexed by hits
    # _k means indexed by objects

    # n_objs x n_outdim
    x_k = x[alphas]
    # 1 x n_objs
    q_k = q[alphas].view(1, -1)

    dist_j_k = torch.cdist(x, x_k)

    qw_j_k = weights.view(-1, 1) * q.view(-1, 1) * q_k

    att_norm_k = (attractive_mask.sum(dim=0) + eps) * len(unique_oids)
    qw_att = (qw_j_k / att_norm_k)[attractive_mask]

    # Attractive potential/loss
    v_att = (qw_att * torch.square(dist_j_k[attractive_mask])).sum()

    repulsive_mask = (~attractive_mask) & (dist_j_k < 1)
    n_rep_k = (~attractive_mask).sum(dim=0)
    n_rep = repulsive_mask.sum()
    # Don't normalize to repulsive_mask, it includes the dist < 1 count,
    # (less points within the radius 1 ball should translate to lower loss)
    rep_norm = (n_rep_k + eps) * len(unique_oids)
    if n_rep > max_n_rep:
        sampling_freq = max_n_rep / n_rep
        sampling_mask = (
            torch.rand_like(repulsive_mask, dtype=torch.float16) < sampling_freq
        )
        repulsive_mask &= sampling_mask
        sampling_scale = n_rep / max_n_rep
        print(f"Sampling {sampling_scale} of repulsive points")
        rep_norm *= sampling_freq
    qw_rep = (qw_j_k / rep_norm)[repulsive_mask]
    v_rep = (qw_rep * (1 - dist_j_k[repulsive_mask])).sum()

    l_coward = torch.mean(1 - beta[alphas])
    l_noise = torch.mean(beta[~not_noise])

    return {
        "attractive": v_att,
        "repulsive": v_rep,
        "coward": l_coward,
        "noise": l_noise,
        "n_rep": n_rep,
    }

In [25]:
# from importlib import reload
# import object_condensation.pytorch
# reload(object_condensation.pytorch.losses)
# from object_condensation.pytorch.losses import condensation_loss

In [26]:
import sys

sys.path.append("/home/kl5675/Documents/23/git_sync/object_condensation")
from tests.loss_test_cases import generate_test_data
from tests.test_losses_torch import TorchCondensationMockData

td = generate_test_data()

td = TorchCondensationMockData.from_numpy(td)
cl = condensation_loss_tiger(
    beta=td.beta.squeeze(),
    x=td.x,
    object_id=td.object_id.squeeze(),
    weights=td.weights.squeeze(),
    q_min=td.q_min,
    noise_threshold=0,
    ml=MemLogger(),
    max_n_rep=1000_000,
)
cl

{'attractive': tensor(1.7951, dtype=torch.float64),
 'repulsive': tensor(1.9509, dtype=torch.float64),
 'coward': tensor(0.2157, dtype=torch.float64),
 'noise': tensor(0.7748, dtype=torch.float64),
 'n_rep': tensor(220768)}

Sampling 2.2076799869537354 of repulsive points

{'attractive': tensor(1.7951, dtype=torch.float64),
 'repulsive': tensor(1.9543, dtype=torch.float64),
 'coward': tensor(0.2157, dtype=torch.float64),
 'noise': tensor(0.7748, dtype=torch.float64),
 'n_rep': tensor(220768)}

In [51]:
ml = MemLogger()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
ml.log("empty")
out = model(dp)
ml.log("model evaluated")
loss = condensation_loss_tiger(
    beta=out["B"],
    x=out["H"],
    object_id=data.particle_id,
    q_min=0.1,
    noise_threshold=0,
    weights=torch.ones_like(data.particle_id),
    # ml=ml,
    max_n_rep=100_000,
)
total_loss = loss["attractive"] + loss["repulsive"] + loss["noise"] + loss["coward"]
ml.log("loss evaluated")
optimizer.zero_grad()
total_loss.backward()
ml.log("backward evaluated")
optimizer.step()
ml.log("Optimizer stepped")
print(total_loss, loss["n_rep"])

Sampling 832.9146118164062 of repulsive points
tensor(0.8953, device='cuda:0', grad_fn=<AddBackward0>) tensor(83291466, device='cuda:0')


In [52]:
import pandas as pd

ml.get_df()

Unnamed: 0,desc,Δmax,max,Δpersistent,persistent,Δtime
0,empty,22.180506,22.180506,22.165249,22.165249,0.001223
1,model evaluated,25.827639,47.992888,23.333476,45.498725,0.236748
2,loss evaluated,26.004328,71.503054,8.212371,53.711096,0.203532
3,backward evaluated,19.140668,72.851764,-31.545847,22.165249,0.499658
4,Optimizer stepped,0.030507,22.195756,0.015254,22.180503,0.39012


```
tiger:

Description                    |        Δ |        Σ |    Δ max |    Σ max
empty                          |     0.17 |     0.17 |     0.18 |     0.18
model evaluated                |    14.75 |    14.92 |    16.38 |    16.55
loss evaluated                 |     4.54 |    19.46 |    12.30 |    27.21
backward evaluated             |   -19.29 |     0.17 |     8.84 |    28.30
step done                      |     0.02 |     0.18 |     0.03 |     0.20


default: 

Description                    |        Δ |        Σ |    Δ max |    Σ max
empty                          |     0.17 |     0.17 |     0.18 |     0.18
model evaluated                |    14.75 |    14.92 |    16.38 |    16.55
loss evaluated                 |    14.18 |    29.10 |    25.19 |    40.11
backward evaluated             |   -28.93 |     0.17 |     5.15 |    34.25
step done                      |     0.02 |     0.18 |     0.03 |     0.20


---

empty                          added     0.14 GB, total     0.14 GB
model evaluated                added    14.76 GB, total    14.90 GB
loss evaluated                 added     3.87 GB, total    18.77 GB
backward done evaluated        added   -18.60 GB, total     0.17 GB
step done                      added     0.02 GB, total     0.18 GB


empty                          added    25.88 GB, total    25.88 GB
model evaluated                added    14.76 GB, total    40.63 GB
loss evaluated                 added    14.18 GB, total    54.82 GB
backward done evaluated        added   -28.93 GB, total    25.88 GB
step done                      added     0.02 GB, total    25.90 GB
```