In [1]:
import logging
import os
import sys
import traceback

import torch

from hydra import compose, initialize_config_module
from hydra.utils import instantiate

from omegaconf import OmegaConf, open_dict

from training.utils.train_utils import makedir, register_omegaconf_resolvers

os.environ["HYDRA_FULL_ERROR"] = "1"

In [2]:
def single_proc_run(local_rank, main_port, cfg, world_size):
    """Single GPU process"""
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = str(main_port)
    os.environ["RANK"] = str(local_rank)
    os.environ["LOCAL_RANK"] = str(local_rank)
    os.environ["WORLD_SIZE"] = str(world_size)
    try:
        register_omegaconf_resolvers()
    except Exception as e:
        logging.info(e)

    trainer = instantiate(cfg.trainer, _recursive_=False)
    trainer.run()


def single_node_runner(cfg, main_port: int):

    # CUDA runtime does not support `fork`
    torch.multiprocessing.set_start_method("spawn")

    single_proc_run(local_rank=0, main_port=main_port, cfg=cfg, world_size=1)


def format_exception(e: Exception, limit=20):
    traceback_str = "".join(traceback.format_tb(e.__traceback__, limit=limit))
    return f"{type(e).__name__}: {e}\nTraceback:\n{traceback_str}"


def add_pythonpath_to_sys_path():
    if "PYTHONPATH" not in os.environ or not os.environ["PYTHONPATH"]:
        return
    sys.path = os.environ["PYTHONPATH"].split(":") + sys.path



In [3]:
initialize_config_module("sam2", version_base="1.2")
register_omegaconf_resolvers()

In [4]:
cfg = compose(config_name="configs/sam2.1_training/sam2.1_hiera_b+_MOSE_finetune.yaml")

In [5]:
# add new keys
with open_dict(cfg):
    cfg.trainer.data.train.datasets[0].dataset.datasets[
        0
    ].video_dataset.is_palette = False
    cfg.trainer.data.train.datasets[0].dataset.datasets[
        0
    ].video_dataset.single_object_mode = True

In [6]:
# Customize the config
cfg.scratch.max_num_objects = 3
cfg.scratch.num_epochs = 20
cfg.launcher.gpus_per_node = 1
cfg.launcher.num_nodes = 1
cfg.dataset.img_folder = (
    "/home/kasm-user/sam2_ft_runpod/prepped_mini_dataset_png_fixed/images"
)
cfg.dataset.gt_folder = (
    "/home/kasm-user/sam2_ft_runpod/prepped_mini_dataset_png_fixed/annotations"
)
cfg.dataset.file_list_txt = (
    "/home/kasm-user/sam2_ft_runpod/prepped_mini_dataset_png_fixed/list_files.txt"
)
cfg.trainer.checkpoint.model_weight_initializer.state_dict.checkpoint_path = (
    "/home/kasm-user/sam2_ft_runpod/checkpoints/sam2.1_hiera_base_plus.pt"
)

In [8]:
if cfg.launcher.experiment_log_dir is None:
    cfg.launcher.experiment_log_dir = os.path.join(
        os.getcwd(), "sam2_logs", "experiment_log_dir"
    )

In [9]:
print("###################### Train App Config ####################")
print(OmegaConf.to_yaml(cfg))
print("############################################################")

###################### Train App Config ####################
scratch:
  resolution: 1024
  train_batch_size: 1
  num_train_workers: 10
  num_frames: 8
  max_num_objects: 3
  base_lr: 5.0e-06
  vision_lr: 3.0e-06
  phases_per_epoch: 1
  num_epochs: 20
dataset:
  img_folder: /home/kasm-user/sam2_ft_runpod/prepped_mini_dataset_png_fixed/images
  gt_folder: /home/kasm-user/sam2_ft_runpod/prepped_mini_dataset_png_fixed/annotations
  file_list_txt: /home/kasm-user/sam2_ft_runpod/prepped_mini_dataset_png_fixed/list_files.txt
  multiplier: 2
vos:
  train_transforms:
  - _target_: training.dataset.transforms.ComposeAPI
    transforms:
    - _target_: training.dataset.transforms.RandomHorizontalFlip
      consistent_transform: true
    - _target_: training.dataset.transforms.RandomAffine
      degrees: 25
      shear: 20
      image_interpolation: bilinear
      consistent_transform: true
    - _target_: training.dataset.transforms.RandomResizeAPI
      sizes: ${scratch.resolution}
      square:

In [10]:
add_pythonpath_to_sys_path()
makedir(cfg.launcher.experiment_log_dir)

True

In [11]:
single_node_runner(cfg, 4500)

INFO 2025-02-12 19:05:50,831 train_utils.py: 108: MACHINE SEED: 2460
INFO 2025-02-12 19:05:50,834 train_utils.py: 154: Logging ENV_VARIABLES
INFO 2025-02-12 19:05:50,835 train_utils.py: 155: AUDIO_PORT=4901
CLICOLOR=1
CLICOLOR_FORCE=1
COLORTERM=truecolor
CONDA_DEFAULT_ENV=sam2_ft
CONDA_EXE=/home/kasm-user/miniconda3/bin/conda
CONDA_PREFIX=/home/kasm-user/miniconda3/envs/sam2_ft
CONDA_PREFIX_1=/home/kasm-user/miniconda3
CONDA_PROMPT_MODIFIER=(sam2_ft) 
CONDA_PYTHON_EXE=/home/kasm-user/miniconda3/bin/python
CONDA_SHLVL=2
CUDA_MODULE_LOADING=LAZY
DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-S7FLjLiiqz,guid=19cd8a6fabda7cf5f433be6667acef54
DEBIAN_FRONTEND=noninteractive
DESKTOP_SESSION=xfce
DISPLAY=:1.0
DISTRO=ubuntu
FORCE_COLOR=1
GIT_PAGER=cat
GOMP_SPINCOUNT=0
HOME=/home/kasm-user
HOSTNAME=51b4585ea9f4
HYDRA_FULL_ERROR=1
INST_SCRIPTS=/dockerstartup/install
JPY_PARENT_PID=6505
JPY_SESSION_NAME=/home/kasm-user/sam2_ft_runpod/training_sav_png_fixed/train.ipynb
JUPYTER_PASSWORD=ebn2aa6noe



INFO 2025-02-12 19:06:01,606 train_utils.py: 271: Train Epoch: [0][ 0/46] | Batch Time: 8.75 (8.75) | Data Time: 5.83 (5.83) | Mem (GB): 27.00 (27.00/27.00) | Time Elapsed: 00d 00h 00m | Losses/train_all_loss: 3.49e-01 (3.49e-01)


grad.sizes() = [64, 256, 1, 1], strides() = [256, 1, 256, 256]
bucket_view.sizes() = [64, 256, 1, 1], strides() = [256, 1, 1, 1] (Triggered internally at /pytorch/torch/csrc/distributed/c10d/reducer.cpp:327.)
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


INFO 2025-02-12 19:06:10,953 train_utils.py: 271: Train Epoch: [0][10/46] | Batch Time: 0.97 (1.65) | Data Time: 0.00 (0.53) | Mem (GB): 29.00 (28.45/30.00) | Time Elapsed: 00d 00h 00m | Losses/train_all_loss: 1.56e+00 (1.13e+00)




INFO 2025-02-12 19:06:19,565 train_utils.py: 271: Train Epoch: [0][20/46] | Batch Time: 0.81 (1.27) | Data Time: 0.00 (0.28) | Mem (GB): 28.00 (28.43/30.00) | Time Elapsed: 00d 00h 00m | Losses/train_all_loss: 2.96e-01 (1.15e+00)
INFO 2025-02-12 19:06:29,407 train_utils.py: 271: Train Epoch: [0][30/46] | Batch Time: 0.80 (1.18) | Data Time: 0.00 (0.19) | Mem (GB): 28.00 (28.58/30.00) | Time Elapsed: 00d 00h 00m | Losses/train_all_loss: 2.20e-01 (1.28e+00)
INFO 2025-02-12 19:06:38,739 train_utils.py: 271: Train Epoch: [0][40/46] | Batch Time: 0.78 (1.12) | Data Time: 0.00 (0.14) | Mem (GB): 28.00 (28.59/30.00) | Time Elapsed: 00d 00h 00m | Losses/train_all_loss: 5.56e-01 (1.50e+00)
INFO 2025-02-12 19:06:43,982 trainer.py: 950: Estimated time remaining: 00d 00h 15m
INFO 2025-02-12 19:06:43,984 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:06:43,985 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 1.4013368252502836, 'Losses/train_all_loss_mask': 0.0101015548348004



INFO 2025-02-12 19:06:50,776 train_utils.py: 271: Train Epoch: [1][ 0/46] | Batch Time: 5.50 (5.50) | Data Time: 4.68 (4.68) | Mem (GB): 28.00 (28.00/28.00) | Time Elapsed: 00d 00h 00m | Losses/train_all_loss: 5.38e-01 (5.38e-01)
INFO 2025-02-12 19:07:00,627 train_utils.py: 271: Train Epoch: [1][10/46] | Batch Time: 1.14 (1.40) | Data Time: 0.00 (0.43) | Mem (GB): 29.00 (28.55/30.00) | Time Elapsed: 00d 00h 01m | Losses/train_all_loss: 1.26e+00 (1.52e+00)




INFO 2025-02-12 19:07:09,601 train_utils.py: 271: Train Epoch: [1][20/46] | Batch Time: 0.79 (1.16) | Data Time: 0.00 (0.22) | Mem (GB): 28.00 (28.48/30.00) | Time Elapsed: 00d 00h 01m | Losses/train_all_loss: 3.24e-01 (1.32e+00)
INFO 2025-02-12 19:07:18,547 train_utils.py: 271: Train Epoch: [1][30/46] | Batch Time: 0.79 (1.07) | Data Time: 0.00 (0.15) | Mem (GB): 28.00 (28.52/30.00) | Time Elapsed: 00d 00h 01m | Losses/train_all_loss: 8.08e-02 (1.12e+00)
INFO 2025-02-12 19:07:27,388 train_utils.py: 271: Train Epoch: [1][40/46] | Batch Time: 0.78 (1.03) | Data Time: 0.00 (0.12) | Mem (GB): 28.00 (28.54/30.00) | Time Elapsed: 00d 00h 01m | Losses/train_all_loss: 1.34e-01 (1.34e+00)
INFO 2025-02-12 19:07:33,499 trainer.py: 950: Estimated time remaining: 00d 00h 14m
INFO 2025-02-12 19:07:33,501 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:07:33,502 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 1.2975476899224778, 'Losses/train_all_loss_mask': 0.0088143936426738



INFO 2025-02-12 19:07:41,742 train_utils.py: 271: Train Epoch: [2][ 0/46] | Batch Time: 6.78 (6.78) | Data Time: 5.97 (5.97) | Mem (GB): 28.00 (28.00/28.00) | Time Elapsed: 00d 00h 01m | Losses/train_all_loss: 1.19e-01 (1.19e-01)
INFO 2025-02-12 19:07:50,629 train_utils.py: 271: Train Epoch: [2][10/46] | Batch Time: 1.00 (1.42) | Data Time: 0.00 (0.54) | Mem (GB): 29.00 (28.45/30.00) | Time Elapsed: 00d 00h 01m | Losses/train_all_loss: 2.67e+00 (6.95e-01)




INFO 2025-02-12 19:08:00,938 train_utils.py: 271: Train Epoch: [2][20/46] | Batch Time: 1.12 (1.24) | Data Time: 0.00 (0.29) | Mem (GB): 30.00 (28.81/30.00) | Time Elapsed: 00d 00h 02m | Losses/train_all_loss: 2.15e+00 (1.04e+00)




INFO 2025-02-12 19:08:09,794 train_utils.py: 271: Train Epoch: [2][30/46] | Batch Time: 0.94 (1.12) | Data Time: 0.00 (0.19) | Mem (GB): 29.00 (28.71/30.00) | Time Elapsed: 00d 00h 02m | Losses/train_all_loss: 7.46e-01 (1.13e+00)
INFO 2025-02-12 19:08:18,923 train_utils.py: 271: Train Epoch: [2][40/46] | Batch Time: 0.75 (1.07) | Data Time: 0.00 (0.15) | Mem (GB): 28.00 (28.68/30.00) | Time Elapsed: 00d 00h 02m | Losses/train_all_loss: 4.96e-02 (1.16e+00)
INFO 2025-02-12 19:08:24,826 trainer.py: 950: Estimated time remaining: 00d 00h 13m
INFO 2025-02-12 19:08:24,828 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:08:24,829 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 1.168295837452878, 'Losses/train_all_loss_mask': 0.011670781789491783, 'Losses/train_all_loss_dice': 0.5577969525171362, 'Losses/train_all_loss_iou': 0.255521372810978, 'Losses/train_all_loss_class': 0.12156187323990292, 'Losses/train_all_core_loss': 1.168295837452878, 'Trainer/where': 0.148913043



INFO 2025-02-12 19:08:31,792 train_utils.py: 271: Train Epoch: [3][ 0/46] | Batch Time: 5.60 (5.60) | Data Time: 4.43 (4.43) | Mem (GB): 30.00 (30.00/30.00) | Time Elapsed: 00d 00h 02m | Losses/train_all_loss: 3.78e+00 (3.78e+00)
INFO 2025-02-12 19:08:41,006 train_utils.py: 271: Train Epoch: [3][10/46] | Batch Time: 0.97 (1.35) | Data Time: 0.00 (0.40) | Mem (GB): 29.00 (28.73/30.00) | Time Elapsed: 00d 00h 02m | Losses/train_all_loss: 3.67e+00 (1.25e+00)
INFO 2025-02-12 19:08:50,617 train_utils.py: 271: Train Epoch: [3][20/46] | Batch Time: 0.76 (1.16) | Data Time: 0.00 (0.21) | Mem (GB): 28.00 (28.71/30.00) | Time Elapsed: 00d 00h 02m | Losses/train_all_loss: 1.81e-01 (1.28e+00)




INFO 2025-02-12 19:08:59,531 train_utils.py: 271: Train Epoch: [3][30/46] | Batch Time: 1.17 (1.08) | Data Time: 0.00 (0.14) | Mem (GB): 30.00 (28.65/30.00) | Time Elapsed: 00d 00h 03m | Losses/train_all_loss: 4.10e-01 (1.11e+00)
INFO 2025-02-12 19:09:09,285 train_utils.py: 271: Train Epoch: [3][40/46] | Batch Time: 0.83 (1.05) | Data Time: 0.00 (0.11) | Mem (GB): 28.00 (28.68/30.00) | Time Elapsed: 00d 00h 03m | Losses/train_all_loss: 7.40e-02 (9.95e-01)
INFO 2025-02-12 19:09:14,711 trainer.py: 950: Estimated time remaining: 00d 00h 12m
INFO 2025-02-12 19:09:14,713 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:09:14,714 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 1.0709656847883826, 'Losses/train_all_loss_mask': 0.007126032660143328, 'Losses/train_all_loss_dice': 0.5639036129350248, 'Losses/train_all_loss_iou': 0.2817036507286779, 'Losses/train_all_loss_class': 0.08283778894389363, 'Losses/train_all_core_loss': 1.0709656847883826, 'Trainer/where': 0.198913



INFO 2025-02-12 19:09:21,565 train_utils.py: 271: Train Epoch: [4][ 0/46] | Batch Time: 5.51 (5.51) | Data Time: 4.66 (4.66) | Mem (GB): 28.00 (28.00/28.00) | Time Elapsed: 00d 00h 03m | Losses/train_all_loss: 2.77e-01 (2.77e-01)
INFO 2025-02-12 19:09:31,294 train_utils.py: 271: Train Epoch: [4][10/46] | Batch Time: 1.18 (1.38) | Data Time: 0.00 (0.45) | Mem (GB): 29.00 (28.55/30.00) | Time Elapsed: 00d 00h 03m | Losses/train_all_loss: 3.04e-01 (9.18e-01)
INFO 2025-02-12 19:09:41,329 train_utils.py: 271: Train Epoch: [4][20/46] | Batch Time: 1.16 (1.20) | Data Time: 0.00 (0.23) | Mem (GB): 29.00 (28.76/30.00) | Time Elapsed: 00d 00h 03m | Losses/train_all_loss: 2.79e+00 (1.14e+00)




INFO 2025-02-12 19:09:50,721 train_utils.py: 271: Train Epoch: [4][30/46] | Batch Time: 1.12 (1.12) | Data Time: 0.00 (0.16) | Mem (GB): 30.00 (28.74/30.00) | Time Elapsed: 00d 00h 03m | Losses/train_all_loss: 2.46e+00 (1.26e+00)
INFO 2025-02-12 19:09:59,213 train_utils.py: 271: Train Epoch: [4][40/46] | Batch Time: 0.97 (1.05) | Data Time: 0.00 (0.12) | Mem (GB): 29.00 (28.66/30.00) | Time Elapsed: 00d 00h 04m | Losses/train_all_loss: 1.33e+00 (1.18e+00)
INFO 2025-02-12 19:10:05,220 trainer.py: 950: Estimated time remaining: 00d 00h 12m
INFO 2025-02-12 19:10:05,223 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:10:05,223 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 1.1314884902018567, 'Losses/train_all_loss_mask': 0.007317020718840902, 'Losses/train_all_loss_dice': 0.6268343484920004, 'Losses/train_all_loss_iou': 0.28825435957506945, 'Losses/train_all_loss_class': 0.07005937089349137, 'Losses/train_all_core_loss': 1.1314884902018567, 'Trainer/where': 0.24891



INFO 2025-02-12 19:10:42,817 train_utils.py: 271: Train Epoch: [5][30/46] | Batch Time: 0.78 (1.17) | Data Time: 0.00 (0.15) | Mem (GB): 28.00 (28.84/30.00) | Time Elapsed: 00d 00h 04m | Losses/train_all_loss: 1.83e-01 (1.01e+00)
INFO 2025-02-12 19:10:51,379 train_utils.py: 271: Train Epoch: [5][40/46] | Batch Time: 0.98 (1.09) | Data Time: 0.00 (0.12) | Mem (GB): 29.00 (28.71/30.00) | Time Elapsed: 00d 00h 05m | Losses/train_all_loss: 1.69e-01 (9.00e-01)
INFO 2025-02-12 19:10:57,327 trainer.py: 950: Estimated time remaining: 00d 00h 11m
INFO 2025-02-12 19:10:57,330 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:10:57,330 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 0.9802756623729415, 'Losses/train_all_loss_mask': 0.007126330775426964, 'Losses/train_all_loss_dice': 0.5520999263162198, 'Losses/train_all_loss_iou': 0.22004301807559704, 'Losses/train_all_loss_class': 0.06560610019209889, 'Losses/train_all_core_loss': 0.9802756623729415, 'Trainer/where': 0.29891



INFO 2025-02-12 19:11:05,704 train_utils.py: 271: Train Epoch: [6][ 0/46] | Batch Time: 6.98 (6.98) | Data Time: 5.77 (5.77) | Mem (GB): 29.00 (29.00/29.00) | Time Elapsed: 00d 00h 05m | Losses/train_all_loss: 1.26e+00 (1.26e+00)
INFO 2025-02-12 19:11:14,874 train_utils.py: 271: Train Epoch: [6][10/46] | Batch Time: 0.97 (1.47) | Data Time: 0.00 (0.53) | Mem (GB): 29.00 (28.73/30.00) | Time Elapsed: 00d 00h 05m | Losses/train_all_loss: 1.20e+00 (8.59e-01)
INFO 2025-02-12 19:11:25,332 train_utils.py: 271: Train Epoch: [6][20/46] | Batch Time: 0.82 (1.27) | Data Time: 0.00 (0.28) | Mem (GB): 28.00 (28.86/30.00) | Time Elapsed: 00d 00h 05m | Losses/train_all_loss: 5.69e-01 (1.09e+00)
INFO 2025-02-12 19:11:34,954 train_utils.py: 271: Train Epoch: [6][30/46] | Batch Time: 1.17 (1.17) | Data Time: 0.00 (0.19) | Mem (GB): 30.00 (28.84/30.00) | Time Elapsed: 00d 00h 05m | Losses/train_all_loss: 1.44e+00 (1.03e+00)
INFO 2025-02-12 19:11:44,040 train_utils.py: 271: Train Epoch: [6][40/46] | Batc



INFO 2025-02-12 19:12:05,543 train_utils.py: 271: Train Epoch: [7][10/46] | Batch Time: 0.80 (1.36) | Data Time: 0.00 (0.52) | Mem (GB): 28.00 (28.18/30.00) | Time Elapsed: 00d 00h 06m | Losses/train_all_loss: 3.20e-01 (6.42e-01)
INFO 2025-02-12 19:12:15,505 train_utils.py: 271: Train Epoch: [7][20/46] | Batch Time: 1.13 (1.19) | Data Time: 0.00 (0.27) | Mem (GB): 30.00 (28.48/30.00) | Time Elapsed: 00d 00h 06m | Losses/train_all_loss: 1.54e+00 (8.69e-01)




INFO 2025-02-12 19:12:25,302 train_utils.py: 271: Train Epoch: [7][30/46] | Batch Time: 1.20 (1.12) | Data Time: 0.00 (0.19) | Mem (GB): 30.00 (28.58/30.00) | Time Elapsed: 00d 00h 06m | Losses/train_all_loss: 5.99e-01 (9.68e-01)
INFO 2025-02-12 19:12:34,364 train_utils.py: 271: Train Epoch: [7][40/46] | Batch Time: 1.22 (1.07) | Data Time: 0.00 (0.14) | Mem (GB): 29.00 (28.54/30.00) | Time Elapsed: 00d 00h 06m | Losses/train_all_loss: 1.09e+00 (8.64e-01)
INFO 2025-02-12 19:12:39,686 trainer.py: 950: Estimated time remaining: 00d 00h 09m
INFO 2025-02-12 19:12:39,688 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:12:39,689 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 0.8107053305467834, 'Losses/train_all_loss_mask': 0.007405927927364879, 'Losses/train_all_loss_dice': 0.39357533143914264, 'Losses/train_all_loss_iou': 0.2286069194138374, 'Losses/train_all_loss_class': 0.04040452623300439, 'Losses/train_all_core_loss': 0.8107053305467834, 'Trainer/where': 0.39891



INFO 2025-02-12 19:13:37,877 train_utils.py: 271: Train Epoch: [9][ 0/46] | Batch Time: 5.51 (5.51) | Data Time: 4.66 (4.66) | Mem (GB): 28.00 (28.00/28.00) | Time Elapsed: 00d 00h 07m | Losses/train_all_loss: 4.36e-01 (4.36e-01)




INFO 2025-02-12 19:13:46,794 train_utils.py: 271: Train Epoch: [9][10/46] | Batch Time: 0.79 (1.31) | Data Time: 0.00 (0.42) | Mem (GB): 28.00 (28.45/30.00) | Time Elapsed: 00d 00h 07m | Losses/train_all_loss: 1.11e-01 (5.88e-01)
INFO 2025-02-12 19:13:56,437 train_utils.py: 271: Train Epoch: [9][20/46] | Batch Time: 1.13 (1.15) | Data Time: 0.00 (0.22) | Mem (GB): 29.00 (28.52/30.00) | Time Elapsed: 00d 00h 08m | Losses/train_all_loss: 2.52e+00 (1.04e+00)
INFO 2025-02-12 19:14:06,858 train_utils.py: 271: Train Epoch: [9][30/46] | Batch Time: 1.12 (1.11) | Data Time: 0.00 (0.15) | Mem (GB): 30.00 (28.68/30.00) | Time Elapsed: 00d 00h 08m | Losses/train_all_loss: 1.67e+00 (1.02e+00)
INFO 2025-02-12 19:14:16,102 train_utils.py: 271: Train Epoch: [9][40/46] | Batch Time: 0.79 (1.07) | Data Time: 0.00 (0.11) | Mem (GB): 28.00 (28.61/30.00) | Time Elapsed: 00d 00h 08m | Losses/train_all_loss: 4.89e-01 (1.00e+00)
INFO 2025-02-12 19:14:21,788 trainer.py: 950: Estimated time remaining: 00d 00h 



INFO 2025-02-12 19:14:29,483 train_utils.py: 271: Train Epoch: [10][ 0/46] | Batch Time: 6.32 (6.32) | Data Time: 5.49 (5.49) | Mem (GB): 28.00 (28.00/28.00) | Time Elapsed: 00d 00h 08m | Losses/train_all_loss: 8.22e-02 (8.22e-02)
INFO 2025-02-12 19:14:39,349 train_utils.py: 271: Train Epoch: [10][10/46] | Batch Time: 1.14 (1.47) | Data Time: 0.00 (0.50) | Mem (GB): 29.00 (28.64/30.00) | Time Elapsed: 00d 00h 08m | Losses/train_all_loss: 1.15e+00 (8.15e-01)




INFO 2025-02-12 19:14:48,431 train_utils.py: 271: Train Epoch: [10][20/46] | Batch Time: 0.77 (1.20) | Data Time: 0.00 (0.26) | Mem (GB): 28.00 (28.52/30.00) | Time Elapsed: 00d 00h 08m | Losses/train_all_loss: 1.42e-01 (7.10e-01)
INFO 2025-02-12 19:14:58,545 train_utils.py: 271: Train Epoch: [10][30/46] | Batch Time: 1.13 (1.14) | Data Time: 0.00 (0.18) | Mem (GB): 30.00 (28.71/30.00) | Time Elapsed: 00d 00h 09m | Losses/train_all_loss: 1.57e+00 (8.53e-01)
INFO 2025-02-12 19:15:07,682 train_utils.py: 271: Train Epoch: [10][40/46] | Batch Time: 1.18 (1.09) | Data Time: 0.00 (0.13) | Mem (GB): 30.00 (28.66/30.00) | Time Elapsed: 00d 00h 09m | Losses/train_all_loss: 1.39e+00 (8.96e-01)
INFO 2025-02-12 19:15:12,928 trainer.py: 950: Estimated time remaining: 00d 00h 07m
INFO 2025-02-12 19:15:12,930 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:15:12,931 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 0.8961702939932761, 'Losses/train_all_loss_mask': 0.0070891400322



INFO 2025-02-12 19:16:22,355 train_utils.py: 271: Train Epoch: [12][10/46] | Batch Time: 1.15 (1.46) | Data Time: 0.00 (0.48) | Mem (GB): 29.00 (28.55/29.00) | Time Elapsed: 00d 00h 10m | Losses/train_all_loss: 2.89e+00 (8.70e-01)
INFO 2025-02-12 19:16:32,112 train_utils.py: 271: Train Epoch: [12][20/46] | Batch Time: 0.79 (1.23) | Data Time: 0.00 (0.25) | Mem (GB): 28.00 (28.71/30.00) | Time Elapsed: 00d 00h 10m | Losses/train_all_loss: 8.45e-01 (1.10e+00)




INFO 2025-02-12 19:16:42,085 train_utils.py: 271: Train Epoch: [12][30/46] | Batch Time: 0.98 (1.15) | Data Time: 0.00 (0.17) | Mem (GB): 29.00 (28.81/30.00) | Time Elapsed: 00d 00h 10m | Losses/train_all_loss: 2.06e+00 (1.09e+00)
INFO 2025-02-12 19:16:52,094 train_utils.py: 271: Train Epoch: [12][40/46] | Batch Time: 1.10 (1.12) | Data Time: 0.00 (0.13) | Mem (GB): 30.00 (28.90/30.00) | Time Elapsed: 00d 00h 11m | Losses/train_all_loss: 1.68e+00 (1.06e+00)
INFO 2025-02-12 19:16:57,377 trainer.py: 950: Estimated time remaining: 00d 00h 05m
INFO 2025-02-12 19:16:57,379 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:16:57,380 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 1.009698307060677, 'Losses/train_all_loss_mask': 0.009370061628390429, 'Losses/train_all_loss_dice': 0.5178368182285972, 'Losses/train_all_loss_iou': 0.26190884259488917, 'Losses/train_all_loss_class': 0.04255140851390076, 'Losses/train_all_core_loss': 1.009698307060677, 'Trainer/where': 0.64891



INFO 2025-02-12 19:17:04,347 train_utils.py: 271: Train Epoch: [13][ 0/46] | Batch Time: 5.54 (5.54) | Data Time: 4.71 (4.71) | Mem (GB): 28.00 (28.00/28.00) | Time Elapsed: 00d 00h 11m | Losses/train_all_loss: 9.48e-02 (9.48e-02)




INFO 2025-02-12 19:17:14,280 train_utils.py: 271: Train Epoch: [13][10/46] | Batch Time: 0.78 (1.41) | Data Time: 0.00 (0.46) | Mem (GB): 28.00 (28.45/29.00) | Time Elapsed: 00d 00h 11m | Losses/train_all_loss: 2.85e-01 (9.72e-01)
INFO 2025-02-12 19:17:23,357 train_utils.py: 271: Train Epoch: [13][20/46] | Batch Time: 1.15 (1.17) | Data Time: 0.00 (0.24) | Mem (GB): 30.00 (28.52/30.00) | Time Elapsed: 00d 00h 11m | Losses/train_all_loss: 8.23e-01 (1.16e+00)
INFO 2025-02-12 19:17:32,451 train_utils.py: 271: Train Epoch: [13][30/46] | Batch Time: 0.80 (1.09) | Data Time: 0.00 (0.16) | Mem (GB): 28.00 (28.52/30.00) | Time Elapsed: 00d 00h 11m | Losses/train_all_loss: 1.76e-01 (1.08e+00)
INFO 2025-02-12 19:17:42,242 train_utils.py: 271: Train Epoch: [13][40/46] | Batch Time: 0.77 (1.06) | Data Time: 0.00 (0.12) | Mem (GB): 28.00 (28.63/30.00) | Time Elapsed: 00d 00h 11m | Losses/train_all_loss: 2.24e-01 (1.08e+00)
INFO 2025-02-12 19:17:48,436 trainer.py: 950: Estimated time remaining: 00d 



INFO 2025-02-12 19:17:56,868 train_utils.py: 271: Train Epoch: [14][ 0/46] | Batch Time: 7.16 (7.16) | Data Time: 5.93 (5.93) | Mem (GB): 30.00 (30.00/30.00) | Time Elapsed: 00d 00h 12m | Losses/train_all_loss: 1.39e+00 (1.39e+00)
INFO 2025-02-12 19:18:06,136 train_utils.py: 271: Train Epoch: [14][10/46] | Batch Time: 0.81 (1.49) | Data Time: 0.00 (0.54) | Mem (GB): 28.00 (28.64/30.00) | Time Elapsed: 00d 00h 12m | Losses/train_all_loss: 2.20e-01 (1.18e+00)




INFO 2025-02-12 19:18:15,879 train_utils.py: 271: Train Epoch: [14][20/46] | Batch Time: 0.80 (1.25) | Data Time: 0.00 (0.28) | Mem (GB): 28.00 (28.62/30.00) | Time Elapsed: 00d 00h 12m | Losses/train_all_loss: 7.14e-02 (1.02e+00)
INFO 2025-02-12 19:18:25,213 train_utils.py: 271: Train Epoch: [14][30/46] | Batch Time: 0.80 (1.15) | Data Time: 0.00 (0.19) | Mem (GB): 28.00 (28.58/30.00) | Time Elapsed: 00d 00h 12m | Losses/train_all_loss: 4.83e-01 (1.00e+00)
INFO 2025-02-12 19:18:34,537 train_utils.py: 271: Train Epoch: [14][40/46] | Batch Time: 1.13 (1.09) | Data Time: 0.00 (0.15) | Mem (GB): 30.00 (28.59/30.00) | Time Elapsed: 00d 00h 12m | Losses/train_all_loss: 5.89e-01 (1.10e+00)
INFO 2025-02-12 19:18:40,248 trainer.py: 950: Estimated time remaining: 00d 00h 04m
INFO 2025-02-12 19:18:40,251 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:18:40,251 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 1.0234140995889902, 'Losses/train_all_loss_mask': 0.0054207502158



INFO 2025-02-12 19:18:48,155 train_utils.py: 271: Train Epoch: [15][ 0/46] | Batch Time: 6.56 (6.56) | Data Time: 5.75 (5.75) | Mem (GB): 28.00 (28.00/28.00) | Time Elapsed: 00d 00h 12m | Losses/train_all_loss: 1.27e+00 (1.27e+00)
INFO 2025-02-12 19:18:57,199 train_utils.py: 271: Train Epoch: [15][10/46] | Batch Time: 1.11 (1.42) | Data Time: 0.00 (0.52) | Mem (GB): 29.00 (28.45/29.00) | Time Elapsed: 00d 00h 13m | Losses/train_all_loss: 1.46e+00 (7.66e-01)




INFO 2025-02-12 19:19:06,053 train_utils.py: 271: Train Epoch: [15][20/46] | Batch Time: 0.77 (1.16) | Data Time: 0.00 (0.27) | Mem (GB): 28.00 (28.48/30.00) | Time Elapsed: 00d 00h 13m | Losses/train_all_loss: 8.58e-02 (7.77e-01)




INFO 2025-02-12 19:19:15,457 train_utils.py: 271: Train Epoch: [15][30/46] | Batch Time: 0.80 (1.09) | Data Time: 0.00 (0.19) | Mem (GB): 28.00 (28.55/30.00) | Time Elapsed: 00d 00h 13m | Losses/train_all_loss: 2.54e-01 (7.73e-01)
INFO 2025-02-12 19:19:24,905 train_utils.py: 271: Train Epoch: [15][40/46] | Batch Time: 0.79 (1.06) | Data Time: 0.00 (0.14) | Mem (GB): 28.00 (28.59/30.00) | Time Elapsed: 00d 00h 13m | Losses/train_all_loss: 1.56e-01 (7.50e-01)
INFO 2025-02-12 19:19:30,691 trainer.py: 950: Estimated time remaining: 00d 00h 03m
INFO 2025-02-12 19:19:30,694 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:19:30,695 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 0.7267211427831132, 'Losses/train_all_loss_mask': 0.007178973572318564, 'Losses/train_all_loss_dice': 0.3941910409409067, 'Losses/train_all_loss_iou': 0.15980009955313543, 'Losses/train_all_loss_class': 0.029150538045254602, 'Losses/train_all_core_loss': 0.7267211427831132, 'Trainer/where': 0.79



INFO 2025-02-12 19:19:37,749 train_utils.py: 271: Train Epoch: [16][ 0/46] | Batch Time: 5.77 (5.77) | Data Time: 4.54 (4.54) | Mem (GB): 29.00 (29.00/29.00) | Time Elapsed: 00d 00h 13m | Losses/train_all_loss: 2.97e+00 (2.97e+00)




INFO 2025-02-12 19:19:47,131 train_utils.py: 271: Train Epoch: [16][10/46] | Batch Time: 0.77 (1.38) | Data Time: 0.00 (0.41) | Mem (GB): 28.00 (28.64/30.00) | Time Elapsed: 00d 00h 13m | Losses/train_all_loss: 3.83e-01 (1.20e+00)




INFO 2025-02-12 19:19:55,955 train_utils.py: 271: Train Epoch: [16][20/46] | Batch Time: 0.86 (1.14) | Data Time: 0.00 (0.22) | Mem (GB): 28.00 (28.48/30.00) | Time Elapsed: 00d 00h 14m | Losses/train_all_loss: 1.80e-01 (8.94e-01)
INFO 2025-02-12 19:20:04,985 train_utils.py: 271: Train Epoch: [16][30/46] | Batch Time: 1.15 (1.06) | Data Time: 0.00 (0.15) | Mem (GB): 30.00 (28.52/30.00) | Time Elapsed: 00d 00h 14m | Losses/train_all_loss: 1.03e+00 (7.87e-01)
INFO 2025-02-12 19:20:14,313 train_utils.py: 271: Train Epoch: [16][40/46] | Batch Time: 0.85 (1.03) | Data Time: 0.00 (0.11) | Mem (GB): 28.00 (28.54/30.00) | Time Elapsed: 00d 00h 14m | Losses/train_all_loss: 7.36e-02 (7.48e-01)
INFO 2025-02-12 19:20:20,250 trainer.py: 950: Estimated time remaining: 00d 00h 02m
INFO 2025-02-12 19:20:20,252 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:20:20,253 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 0.7119199207619481, 'Losses/train_all_loss_mask': 0.0055693212077



INFO 2025-02-12 19:20:37,480 train_utils.py: 271: Train Epoch: [17][10/46] | Batch Time: 0.80 (1.43) | Data Time: 0.00 (0.49) | Mem (GB): 28.00 (28.45/30.00) | Time Elapsed: 00d 00h 14m | Losses/train_all_loss: 4.30e-01 (5.70e-01)




INFO 2025-02-12 19:20:46,715 train_utils.py: 271: Train Epoch: [17][20/46] | Batch Time: 0.79 (1.19) | Data Time: 0.00 (0.26) | Mem (GB): 28.00 (28.57/30.00) | Time Elapsed: 00d 00h 14m | Losses/train_all_loss: 1.25e-01 (5.75e-01)
INFO 2025-02-12 19:20:56,966 train_utils.py: 271: Train Epoch: [17][30/46] | Batch Time: 0.99 (1.14) | Data Time: 0.00 (0.18) | Mem (GB): 29.00 (28.68/30.00) | Time Elapsed: 00d 00h 15m | Losses/train_all_loss: 6.05e-01 (6.59e-01)
INFO 2025-02-12 19:21:06,228 train_utils.py: 271: Train Epoch: [17][40/46] | Batch Time: 0.81 (1.09) | Data Time: 0.00 (0.13) | Mem (GB): 28.00 (28.63/30.00) | Time Elapsed: 00d 00h 15m | Losses/train_all_loss: 1.63e-01 (6.14e-01)
INFO 2025-02-12 19:21:11,671 trainer.py: 950: Estimated time remaining: 00d 00h 01m
INFO 2025-02-12 19:21:11,673 trainer.py: 892: Synchronizing meters
INFO 2025-02-12 19:21:11,674 trainer.py: 830: Losses and meters: {'Losses/train_all_loss': 0.6726862106310285, 'Losses/train_all_loss_mask': 0.0064911426686



INFO 2025-02-12 19:21:20,022 train_utils.py: 271: Train Epoch: [18][ 0/46] | Batch Time: 6.94 (6.94) | Data Time: 5.75 (5.75) | Mem (GB): 30.00 (30.00/30.00) | Time Elapsed: 00d 00h 15m | Losses/train_all_loss: 8.10e-01 (8.10e-01)




INFO 2025-02-12 19:21:29,152 train_utils.py: 271: Train Epoch: [18][10/46] | Batch Time: 0.78 (1.46) | Data Time: 0.00 (0.52) | Mem (GB): 28.00 (28.82/30.00) | Time Elapsed: 00d 00h 15m | Losses/train_all_loss: 5.20e-02 (8.76e-01)
INFO 2025-02-12 19:21:39,246 train_utils.py: 271: Train Epoch: [18][20/46] | Batch Time: 1.17 (1.25) | Data Time: 0.00 (0.27) | Mem (GB): 30.00 (28.95/30.00) | Time Elapsed: 00d 00h 15m | Losses/train_all_loss: 4.24e-01 (1.10e+00)
INFO 2025-02-12 19:21:48,205 train_utils.py: 271: Train Epoch: [18][30/46] | Batch Time: 0.80 (1.13) | Data Time: 0.00 (0.19) | Mem (GB): 28.00 (28.77/30.00) | Time Elapsed: 00d 00h 15m | Losses/train_all_loss: 7.33e-02 (8.95e-01)
INFO 2025-02-12 19:21:58,114 train_utils.py: 271: Train Epoch: [18][40/46] | Batch Time: 0.80 (1.10) | Data Time: 0.00 (0.14) | Mem (GB): 28.00 (28.78/30.00) | Time Elapsed: 00d 00h 16m | Losses/train_all_loss: 3.47e-01 (9.73e-01)
INFO 2025-02-12 19:22:03,798 trainer.py: 950: Estimated time remaining: 00d 



INFO 2025-02-12 19:22:10,938 train_utils.py: 271: Train Epoch: [19][ 0/46] | Batch Time: 5.80 (5.80) | Data Time: 4.97 (4.97) | Mem (GB): 28.00 (28.00/28.00) | Time Elapsed: 00d 00h 16m | Losses/train_all_loss: 2.22e-01 (2.22e-01)




INFO 2025-02-12 19:22:21,498 train_utils.py: 271: Train Epoch: [19][10/46] | Batch Time: 0.79 (1.49) | Data Time: 0.00 (0.45) | Mem (GB): 28.00 (29.00/30.00) | Time Elapsed: 00d 00h 16m | Losses/train_all_loss: 2.25e-01 (1.64e+00)
INFO 2025-02-12 19:22:30,323 train_utils.py: 271: Train Epoch: [19][20/46] | Batch Time: 0.77 (1.20) | Data Time: 0.00 (0.24) | Mem (GB): 28.00 (28.71/30.00) | Time Elapsed: 00d 00h 16m | Losses/train_all_loss: 5.51e-01 (1.32e+00)
INFO 2025-02-12 19:22:39,577 train_utils.py: 271: Train Epoch: [19][30/46] | Batch Time: 1.13 (1.11) | Data Time: 0.00 (0.16) | Mem (GB): 30.00 (28.71/30.00) | Time Elapsed: 00d 00h 16m | Losses/train_all_loss: 7.91e-01 (1.27e+00)
INFO 2025-02-12 19:22:48,565 train_utils.py: 271: Train Epoch: [19][40/46] | Batch Time: 0.77 (1.06) | Data Time: 0.00 (0.12) | Mem (GB): 28.00 (28.68/30.00) | Time Elapsed: 00d 00h 16m | Losses/train_all_loss: 3.85e-02 (1.18e+00)
INFO 2025-02-12 19:22:54,750 trainer.py: 950: Estimated time remaining: 00d 

In [12]:
print("Finito")

Finito
