# Dual Domain Training for 3D Datasets
This Notebook is an upgraded version of the already existing Training_3D.ipynb. The reconstructed images of the models trained by the normal training seem to be very noisy. The dual domain training can hopefully remove the noise better. The structure of the Network stays the same but ray and the torch distributor function are used to run the training twice on two different gpus. Furthermore the ispace loss is used to weight the difference between the kspace data of the 2 models and to average their gradients before doing the optimizer step. In this way both trainings are running seperately but the optimizer step they do are the exact same because the greadients used during the optimizers step are the average of the both calculated gradients. Setting the weight_ispace_loss on 0 results in the normal Training again (except of the fact that there are 2 models getting trained but only one is saved at the end).

In [None]:
# Import of all necessary functions and classes
import os
import torch
import gc
import time
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from ray.train.torch import TorchTrainer
from ray.train import ScalingConfig
from ray.air.config import RunConfig
from typing import Tuple
import numpy as np
import torch.distributed as dist
import ray
import h5py
import zarr as z

os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"

import sys
sys.path.insert(0, "../../src")
from juart.dl.loss.loss import JointLoss
from juart.dl.operation.modules import training
from juart.dl.utils.dist import GradientAccumulator
from juart.dl.model.unrollnet import LookaheadModel, UnrolledNet
from juart.dl.checkpoint.manager import CheckpointManager
from juart.conopt.functional.fourier import (
    fourier_transform_adjoint,
    fourier_transform_forward,
    nonuniform_fourier_transform_adjoint,
)

# activates the terminal output for print commands in ray
import logging
logging.basicConfig(level=logging.INFO)

# Training function that is later passed to the TorchTrainer
def train_func():

    # define variables
    shape = (156,156,156,2,1)
    nX, nY, nZ, nTI, nTE = shape
    weight_kspace_loss = [0.5, 0.5] # weight the difference in k space
    weight_ispace_loss = [0.1, 0.1] # weight the difference of the two images (dual domain) and average their gradients
    weight_hankel_loss = [0.0, 0.0]
    weight_casorati_loss = [0.0, 0.0]
    weight_wavelet_loss = [0.0, 0.0] # weight the loss in wavelet domain
    normalized_loss = True
    
    batch_size = 1 # number of datapoints used per batch iteration
    nD = 1 # number of datasets
    nP = 25 # number of permutations per epoch
    cgiter = 50 # number of dc iterations
    num_epochs = 20 # number of epochs
    
    global_rank = int(dist.get_rank())
    world_size = int(dist.get_world_size())
    group_size = 2
    model_dir = f'corr_modelDD_01i_{nP}P_{cgiter}DC'
    root_dir ="/home/jovyan/models"
    endpoint_url = "https://s3.fz-juelich.de"
    model_backend = 'local'

    single_epoch = False # if its true the script will stop after 1 epoch
    save_checkpoint = True # enables checkpoint saving
    checkpoint_frequency = 5 # number of iterations between the save files
    load_model_state = True # if true the latest model state will be loaded if available
    load_averaged_model_state = True # latest averaged model state will be loaded
    load_optim_state = True # latest optimizer state will be loaded
    load_metrics = True # the latest metrics (lost, iterations) will be loaded

    num_groups = 1
    batch_size_local = batch_size // num_groups
    num_iterations = nD * nP * num_epochs
    
    ################################################################
    # Setting the rank for each worker
    for rank in range(0, world_size, group_size):
        ranks = list(range(rank, rank + group_size, 1))
        device = f"cuda:{global_rank}"
        if global_rank in ranks:
            print(f"Rank {global_rank} is in group {ranks} ...")
            group = dist.new_group(ranks, backend="gloo")
    
    ################################################################
    # reading and shaping data
    data_path = "/home/jovyan/juart/examples/data/3DLiss_vd_preproc.h5"
    with h5py.File(data_path, "r") as f:
        
        k = torch.from_numpy(f['k'][:])[...,None]
        C = torch.from_numpy(f['coilsens'][:])
        d = torch.from_numpy(f['d'][:])[...,None]
    
        print(f"Coilsensitivity shape {C.shape}")
        print(f"Trajectory shape {k.shape}")
        print(f"Signal shape {d.shape}")

    k /= (2*k.max())

    ################################################################
    # Defining the neural network
    
    model = UnrolledNet(shape,
                      CG_Iter = cgiter,
                      num_unroll_blocks = 10,
                      num_res_blocks = 15,
                      features = 32,
                      axes = (1,2,3),
                      kernel_size = (3,3,3),
                      activation = 'ReLU',
                      ResNetCheckpoints = True).to(device)

    loss_fn = JointLoss(
        shape,
        (3, 3),
        weights_kspace_loss = weight_kspace_loss,
        weights_ispace_loss = weight_ispace_loss,
        weights_hankel_loss = weight_hankel_loss,
        weights_casorati_loss = weight_casorati_loss,
        weights_wavelet_loss = weight_wavelet_loss,
        normalized_loss=normalized_loss,
        group = group,
        device=device,
    )
    
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=0.0001,
        betas=[0.9, 0.999],
        eps=1.0e-8,
        weight_decay=0.0,
    )

    accumulator = GradientAccumulator(
        model,
        accumulation_steps=batch_size_local,
        max_norm=1.0,
        normalized_gradient=False,
    )

    averaged_model = LookaheadModel(
        model,
        alpha=0.5,
        k=5,
    )

    dist.barrier()
    
    checkpoint_manager = CheckpointManager(
        model_dir,
        root_dir=root_dir,
        endpoint_url=endpoint_url,
        backend=model_backend,
    )

    dist.barrier()

    ################################################################
    # LOADING CURRENT MODEL STATE
    if load_model_state:
        print(f"Rank {global_rank} - Loading model state ...")
        checkpoint = checkpoint_manager.load(["model_state"], map_location=device)
        if all(checkpoint.values()):
            model.load_state_dict(checkpoint["model_state"])
        else:
            print(f"Rank {global_rank} - Could not load model state.")
    
    if load_averaged_model_state:
        print(f"Rank {global_rank} - Loading averaged model state ...")
        checkpoint = checkpoint_manager.load(
            ["averaged_model_state"], map_location=device
        )
        if all(checkpoint.values()):
            averaged_model.load_state_dict(checkpoint["averaged_model_state"])
        else:
            print(f"Rank {global_rank} - Could not load averaged model state.")
    
    if load_optim_state:
        print(f"Rank {global_rank} - Loading optim state ...")
        checkpoint = checkpoint_manager.load(["optim_state"], map_location=device)
        if all(checkpoint.values()):
            optimizer.load_state_dict(checkpoint["optim_state"])
        else:
            print(f"Rank {global_rank} - Could not load optim state.")
    
        total_trn_loss = list()
        total_val_loss = list()
        iteration = 0
    
    if load_metrics:
        print(f"Rank {global_rank} - Loading metrics ...")
        checkpoint = checkpoint_manager.load(["trn_loss", "val_loss", "iteration"])
        if all(checkpoint.values()):
            total_trn_loss = list(checkpoint["trn_loss"])
            total_val_loss = list(checkpoint["val_loss"])
            iteration = checkpoint["iteration"]
        else:
            print(f"Rank {global_rank} - Could not load metrics.")

    print(f"Rank {global_rank} - Continue with iteration {iteration} ...")

    dist.barrier()

    ################################################################
    # ACTUAL TRAINING LOOP
    total_trn_loss = list()
    total_val_loss = list()
    iteration = 0

    generator = torch.Generator()

    while iteration < num_iterations:
        tic = time.time()
        generator.manual_seed(iteration%nP)
    
        kspace_mask_worker0 = torch.randint(0, 2, (1, d.shape[1], 2, 1), generator=generator)
        kspace_mask_worker1 = 1 - kspace_mask_worker0

        # Defining data for worker 0
        if global_rank == 0:
            d_masked = d * kspace_mask_worker0
            AHd = nonuniform_fourier_transform_adjoint(k, d_masked, (nX, nY, nZ))
            AHd = torch.sum(torch.conj(C[..., None, None]) * AHd, dim=0)
        
            data = [
               {
                   "images_regridded": AHd,
                   "kspace_trajectory": k,
                   "sensitivity_maps": C,
                   "kspace_mask_source": kspace_mask_worker1,
                   "kspace_mask_target": kspace_mask_worker0,
                   "kspace_data": d,
               }
            ]

        # Defining data for worker 1
        elif global_rank == 1:
            d_masked = d * kspace_mask_worker1
            AHd = nonuniform_fourier_transform_adjoint(k, d_masked, (nX, nY, nZ))
            AHd = torch.sum(torch.conj(C[..., None,None]) * AHd, dim=0)
    
            data = [
               {
                   "images_regridded": AHd,
                   "kspace_trajectory": k,
                   "sensitivity_maps": C,
                   "kspace_mask_source": kspace_mask_worker0,
                   "kspace_mask_target": kspace_mask_worker1,
                   "kspace_data": d,
               }
            ]
    
        trn_loss = training(
           [0],
           data,
           model,
           loss_fn,
           optimizer,
           accumulator,
           group=group,
           device=device,
        )

        val_loss = [0] * batch_size
        total_trn_loss.append(trn_loss)

    ################################################################
    # SAVING DATA
        if global_rank == 0:
            # Completed epoch
            if (
                save_checkpoint
                and np.mod(iteration + batch_size, nD * nP) == 0
            ):
                print("Creating tagged checkpoint ...")
    
                checkpoint = {
                    "iteration": iteration + batch_size,
                    "model_state": model.state_dict(),
                    "averaged_model_state": averaged_model.state_dict(),
                    "optim_state": optimizer.state_dict(),
                    "trn_loss": total_trn_loss,
                    "val_loss": total_val_loss,
                }
    
                epoch = (iteration + batch_size) // (nD * nP)
                checkpoint_manager.save(checkpoint, tag=f"_epoch_{epoch}")
    
                if single_epoch:
                    # Also save the checkpoint as untagged checkpoint
                    # Otherwise, training will be stuck in endless loop
                    checkpoint_manager.save(checkpoint)
                    checkpoint_manager.release()
                    break
    
            # Intermediate checkpoint
            elif (
                save_checkpoint
                and np.mod(iteration + batch_size, checkpoint_frequency) == 0
            ):
                print("Creating untagged checkpoint ...")
    
                checkpoint = {
                    "iteration": iteration + batch_size,
                    "model_state": model.state_dict(),
                    "averaged_model_state": averaged_model.state_dict(),
                    "optim_state": optimizer.state_dict(),
                    "trn_loss": total_trn_loss,
                    "val_loss": total_val_loss,
                }
    
                checkpoint_manager.save(checkpoint, block=False)
    
            toc = time.time() - tic
    
            print(
                (
                    f"Iteration: {iteration} - "
                    + f"Elapsed time: {toc:.0f} - "
                    + f"Training loss: {[f'{loss:.3f}' for loss in trn_loss]} - "
                    + f"Validation loss: {[f'{loss:.3f}' for loss in val_loss]}"
                )
            )
    
        torch.cuda.empty_cache()
        gc.collect()
    
        iteration += batch_size

    # Return the trained model
    return {"model": model.parameters()}

################################################################
# main function that initializes needed classes and runs the train function
def main():
    
    ray.init(runtime_env={"working_dir": "/home/jovyan/juart/src"})
    scaling_config = ScalingConfig(
        num_workers=2, # number of workers that should be initialized
        use_gpu=True,  # should gpu be used?
        resources_per_worker={"CPU": 24, "GPU": 1},
    )

    # Define the run configuration
    run_config = RunConfig(
        name="torch_trainer_example", # name of the log file
        verbose=1, # detail of the ouput
    )

    # Create the TorchTrainer
    trainer = TorchTrainer(
        train_func,
        scaling_config=scaling_config,
        run_config=run_config,
    )

    # Run the training
    result = trainer.fit() # runs the function we passed to the trainer
    print("Training complete!")

if __name__ == "__main__":
    main()


2025-09-29 11:57:49,010	INFO worker.py:1942 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
2025-09-29 11:57:49,109	INFO packaging.py:588 -- Creating a file package for local module '/home/jovyan/juart/src'.
2025-09-29 11:57:49,382	INFO packaging.py:380 -- Pushing file package 'gcs://_ray_pkg_bf6b43a98c80cd6e.zip' (70.13MiB) to Ray cluster...
2025-09-29 11:57:49,864	INFO packaging.py:393 -- Successfully pushed file package 'gcs://_ray_pkg_bf6b43a98c80cd6e.zip'.
2025-09-29 11:57:55,028	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
2025-09-29 11:57:55,032	INFO tensorboardx.py:193 -- pip install "ray[tune]" to see TensorBoard files.


== Status ==
Current time: 2025-09-29 11:57:55 (running for 00:00:00.13)
Using FIFO scheduling algorithm.
Logical resource usage: 0/128 CPUs, 0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 PENDING)


== Status ==
Current time: 2025-09-29 11:58:00 (running for 00:00:05.19)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 PENDING)






== Status ==
Current time: 2025-09-29 11:58:05 (running for 00:00:10.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




[36m(RayTrainWorker pid=569511)[0m Setting up process group for: env:// [rank=0, world_size=2]
[36m(TorchTrainer pid=566798)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=566798)[0m - (node_id=cfa7bc5017388458110899e2b51673e25920f4d8be2bb9c837561354, ip=10.1.64.190, pid=569511) world_rank=0, local_rank=0, node_rank=0
[36m(TorchTrainer pid=566798)[0m - (node_id=cfa7bc5017388458110899e2b51673e25920f4d8be2bb9c837561354, ip=10.1.64.190, pid=569510) world_rank=1, local_rank=1, node_rank=0


[36m(RayTrainWorker pid=569510)[0m Rank 1 is in group [0, 1] ...
[36m(RayTrainWorker pid=569510)[0m Coilsensitivity shape torch.Size([8, 156, 156, 156])
[36m(RayTrainWorker pid=569510)[0m Trajectory shape torch.Size([3, 2001191, 2, 1])
[36m(RayTrainWorker pid=569510)[0m Signal shape torch.Size([8, 2001191, 2, 1])
== Status ==
Current time: 2025-09-29 11:58:10 (running for 00:00:15.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




[36m(RayTrainWorker pid=569510)[0m [rank1]:[W929 11:58:12.251824589 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 1]  using GPU 1 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loading model state ...
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Could not load model state.
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loading averaged model state ...
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Could not load averaged model state.
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loading optim state ...
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Could not load optim state.
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loading metrics ...
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Could not load metrics.
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Continue with iteration 0 ...
== Status ==
Current time: 2025-09-29 11:58:15 (running for 00:00:20.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trial

[36m(RayTrainWorker pid=569511)[0m [rank0]:[W929 11:58:12.249138466 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 0]  using GPU 0 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.


== Status ==
Current time: 2025-09-29 11:58:25 (running for 00:00:30.29)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 11:58:30 (running for 00:00:35.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 11:58:35 (running for 00:00:40.33)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:34, 10.47s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 11:58:40 (running for 00:00:45.35)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 11:58:45 (running for 00:00:50.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.39s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 11:58:50 (running for 00:00:55.39)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 11:58:55 (running for 00:01:00.41)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.37s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 11:59:00 (running for 00:01:05.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 11:59:05 (running for 00:01:10.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.36s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 11:59:10 (running for 00:01:15.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 11:59:15 (running for 00:01:20.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:51<00:51, 10.37s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 11:59:20 (running for 00:01:25.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 11:59:25 (running for 00:01:30.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.38s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 11:59:30 (running for 00:01:35.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 11:59:35 (running for 00:01:40.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 11:59:40 (running for 00:01:45.57)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.39s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 11:59:45 (running for 00:01:50.59)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 11:59:50 (running for 00:01:55.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.39s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 11:59:55 (running for 00:02:00.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:00:00 (running for 00:02:05.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.40s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:00:05 (running for 00:02:10.67)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:00:10 (running for 00:02:15.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:43<00:00, 10.39s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:35<00:10, 10.47s/it]
100%|██████████████████████████████████████████| 10/10 [01:45<00:00, 10.59s/it]


== Status ==
Current time: 2025-09-29 12:00:15 (running for 00:02:20.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
== Status ==
Current time: 2025-09-29 12:00:20 (running for 00:02:25.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 12:00:25 (running for 00:02:30.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:00:30 (running for 00:02:35.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:00:35 (running for 00:02:40.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.496
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 0 - Elapsed time: 353 - Training loss: ['1.646'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 12:04:06 (running for 00:06:11.59)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:04:11 (running for 00:06:16.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTra

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:04:16 (running for 00:06:21.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:04:21 (running for 00:06:26.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:04:26 (running for 00:06:31.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:04:31 (running for 00:06:36.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.38s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:04:36 (running for 00:06:41.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:04:41 (running for 00:06:46.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.39s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:04:46 (running for 00:06:51.76)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:04:51 (running for 00:06:56.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.39s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:04:56 (running for 00:07:01.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:05:01 (running for 00:07:06.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.40s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:05:06 (running for 00:07:11.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:05:11 (running for 00:07:16.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:51<00:52, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:05:16 (running for 00:07:21.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:05:21 (running for 00:07:26.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:05:26 (running for 00:07:31.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:05:31 (running for 00:07:36.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:05:36 (running for 00:07:41.95)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:05:42 (running for 00:07:46.97)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:05:47 (running for 00:07:51.99)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:05:52 (running for 00:07:57.01)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:05:57 (running for 00:08:02.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:06:02 (running for 00:08:07.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization
== Status ==
Current time: 2025-09-29

100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.41s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.45s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 12:06:12 (running for 00:08:17.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.658 - Loss image space: 0.035 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 12:06:17 (running for 00:08:22.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:06:22 (running for 00:08:27.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:06:27 (running for 00:08:32.25)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.421
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 1 - Elapsed time: 352 - Training loss: ['1.621'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 12:10:03 (running for 00:12:08.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - compute backward pass done -> compute accumulator
[36m(RayTrainWorker pid=569511)[0m Rank 0 - Index 0 - Gradient norm: 6.748
== Status ==
Current time: 2025-09-29 12:10:08 (running for 00:12:13.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:10:13 (running for 00:12:18.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:10:18 (running for 00:12:23.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:10:23 (running for 00:12:28.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.41s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:10:28 (running for 00:12:33.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:10:33 (running for 00:12:38.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:10:38 (running for 00:12:43.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:10:43 (running for 00:12:48.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:10:48 (running for 00:12:53.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:10:53 (running for 00:12:58.30)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:10:58 (running for 00:13:03.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:11:03 (running for 00:13:08.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:11:08 (running for 00:13:13.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:11:13 (running for 00:13:18.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:11:18 (running for 00:13:23.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:11:23 (running for 00:13:28.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:11:28 (running for 00:13:33.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:11:33 (running for 00:13:38.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:11:38 (running for 00:13:43.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:11:43 (running for 00:13:48.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:11:48 (running for 00:13:53.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:11:53 (running for 00:13:58.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:11:58 (running for 00:14:03.55)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 12:12:03 (running for 00:14:08.57)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.619 - Loss image space: 0.037 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass
== Status ==
Current time: 2025-09-29 12:12:0



== Status ==
Current time: 2025-09-29 12:12:13 (running for 00:14:18.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:12:18 (running for 00:14:23.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:12:23 (running for 00:14:28.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.481
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 2 - Elapsed time: 354 - Training loss: ['1.629'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 12:15:54 (running for 00:17:59.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:15:59 (running for 00:18:04.49)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTra

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:16:04 (running for 00:18:09.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:16:09 (running for 00:18:14.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:16:14 (running for 00:18:19.55)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:16:19 (running for 00:18:24.57)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 10%|████▎                                      | 1/10 [00:10<01:33, 10.39s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:16:24 (running for 00:18:29.59)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:16:29 (running for 00:18:34.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:16:34 (running for 00:18:39.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:16:39 (running for 00:18:44.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:16:44 (running for 00:18:49.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:16:49 (running for 00:18:54.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:16:54 (running for 00:18:59.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:16:59 (running for 00:19:04.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:17:04 (running for 00:19:09.74)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:17:09 (running for 00:19:14.76)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:17:14 (running for 00:19:19.78)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:17:19 (running for 00:19:24.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:17:24 (running for 00:19:29.82)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:17:29 (running for 00:19:34.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:17:34 (running for 00:19:39.86)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:17:39 (running for 00:19:44.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.45s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:17:44 (running for 00:19:49.90)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:17:49 (running for 00:19:54.92)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 12:17:54 (running for 00:19:59.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:18:00 (running for 00:20:04.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 12:18:05 (running for 00:20:09.99)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:18:10 (running for 00:20:15.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:18:15 (running for 00:20:20.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:21:45 (running for 00:23:50.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.272
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 3 - Elapsed time: 353 - Training loss: ['1.633'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 12:21:50 (running for 00:23:55.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTra

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:22:00 (running for 00:24:05.92)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:22:05 (running for 00:24:10.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:22:10 (running for 00:24:15.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.41s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:22:16 (running for 00:24:20.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:22:21 (running for 00:24:26.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:22:26 (running for 00:24:31.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:22:31 (running for 00:24:36.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:22:36 (running for 00:24:41.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:22:41 (running for 00:24:46.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:22:46 (running for 00:24:51.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:22:51 (running for 00:24:56.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:22:56 (running for 00:25:01.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:23:01 (running for 00:25:06.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:23:06 (running for 00:25:11.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:23:11 (running for 00:25:16.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:23:16 (running for 00:25:21.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:23:21 (running for 00:25:26.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:23:26 (running for 00:25:31.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:23:31 (running for 00:25:36.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:23:36 (running for 00:25:41.30)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:23:41 (running for 00:25:46.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:23:46 (running for 00:25:51.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.48s/it]


== Status ==
Current time: 2025-09-29 12:23:51 (running for 00:25:56.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.585 - Loss image space: 0.036 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 12:23:56 (running for 00:26:01.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:24:01 (running for 00:26:06.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:24:06 (running for 00:26:11.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.376
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Creating untagged checkpoint ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 12:27:39 Schedule checkpoint save with tag:  ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 12:27:39 Saved checkpoint to buffer 0.0 seconds
[36m(RayTrainWorker pid=569511)[0m Iteration: 4 - Elapsed time: 353 - Training loss: ['1.616'] - Validation loss: ['0.000']
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 12:27:39 Saved buffer to filesystem in 0.0 seconds
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 12:27:39 Completed saving checkpoint.
== Status ==
Current time: 2025-09-29 12:27:42 (running for 00:29:47.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:27:52 (running for 00:29:57.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:27:57 (running for 00:30:02.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:28:02 (running for 00:30:07.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:28:07 (running for 00:30:12.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:28:12 (running for 00:30:17.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:28:17 (running for 00:30:22.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:28:22 (running for 00:30:27.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:28:27 (running for 00:30:32.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:28:32 (running for 00:30:37.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:28:37 (running for 00:30:42.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:28:42 (running for 00:30:47.55)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:28:47 (running for 00:30:52.57)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:28:52 (running for 00:30:57.59)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:28:57 (running for 00:31:02.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:29:02 (running for 00:31:07.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:29:07 (running for 00:31:12.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:29:12 (running for 00:31:17.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:29:17 (running for 00:31:22.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:29:22 (running for 00:31:27.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:29:27 (running for 00:31:32.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:29:32 (running for 00:31:37.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:29:37 (running for 00:31:42.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:29:42 (running for 00:31:47.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 12:29:47 (running for 00:31:52.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.675 - Loss image space: 0.036 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 12:29:52 (running for 00:31:57.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:29:57 (running for 00:32:02.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:30:02 (running for 00:32:07.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.324
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 5 - Elapsed time: 357 - Training loss: ['1.637'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 12:33:38 (running for 00:35:43.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:33:43 (running for 00:35:48.78)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTra

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:33:48 (running for 00:35:53.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:33:53 (running for 00:35:58.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:33:58 (running for 00:36:03.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:34:03 (running for 00:36:08.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:34:08 (running for 00:36:13.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:34:13 (running for 00:36:18.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:34:18 (running for 00:36:23.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:34:23 (running for 00:36:28.95)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:34:29 (running for 00:36:33.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:34:34 (running for 00:36:38.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:34:39 (running for 00:36:44.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:34:44 (running for 00:36:49.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:34:49 (running for 00:36:54.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:34:54 (running for 00:36:59.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:34:59 (running for 00:37:04.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:35:04 (running for 00:37:09.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:35:09 (running for 00:37:14.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:35:14 (running for 00:37:19.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:35:19 (running for 00:37:24.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:35:24 (running for 00:37:29.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:35:29 (running for 00:37:34.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:35:34 (running for 00:37:39.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.44s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.49s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.48s/it]


== Status ==
Current time: 2025-09-29 12:35:39 (running for 00:37:44.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:35:44 (running for 00:37:49.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:35:49 (running for 00:37:54.29)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5



== Status ==
Current time: 2025-09-29 12:35:54 (running for 00:37:59.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:35:59 (running for 00:38:04.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:36:04 (running for 00:38:09.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.422
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 6 - Elapsed time: 361 - Training loss: ['1.638'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 12:39:40 (running for 00:41:45.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:39:45 (running for 00:41:50.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTra

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:39:50 (running for 00:41:55.55)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:39:55 (running for 00:42:00.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:40:00 (running for 00:42:05.59)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:40:05 (running for 00:42:10.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:40:10 (running for 00:42:15.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:40:15 (running for 00:42:20.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 20%|████████▌                                  | 2/10 [00:20<01:23, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:40:20 (running for 00:42:25.67)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:40:25 (running for 00:42:30.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:40:30 (running for 00:42:35.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:40:35 (running for 00:42:40.74)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:40:40 (running for 00:42:45.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:40:45 (running for 00:42:50.78)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:40:50 (running for 00:42:55.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:40:55 (running for 00:43:00.82)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:41:00 (running for 00:43:05.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:41:05 (running for 00:43:10.86)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:41:10 (running for 00:43:15.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:41:15 (running for 00:43:20.90)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:41:20 (running for 00:43:25.92)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:41:25 (running for 00:43:30.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:41:31 (running for 00:43:35.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:41:36 (running for 00:43:40.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.44s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 12:41:41 (running for 00:43:46.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:41:46 (running for 00:43:51.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 12:41:51 (running for 00:43:56.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:41:56 (running for 00:44:01.07)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:42:01 (running for 00:44:06.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.224
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 7 - Elapsed time: 362 - Training loss: ['1.632'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 12:45:42 (running for 00:47:46.99)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:45:47 (running for 00:47:52.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTra

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:45:52 (running for 00:47:57.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:45:57 (running for 00:48:02.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:46:02 (running for 00:48:07.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:46:07 (running for 00:48:12.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:46:12 (running for 00:48:17.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:46:17 (running for 00:48:22.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:46:22 (running for 00:48:27.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:46:27 (running for 00:48:32.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:46:32 (running for 00:48:37.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:46:37 (running for 00:48:42.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:46:42 (running for 00:48:47.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:46:47 (running for 00:48:52.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:46:52 (running for 00:48:57.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:46:57 (running for 00:49:02.29)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:47:02 (running for 00:49:07.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:47:07 (running for 00:49:12.33)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:47:12 (running for 00:49:17.35)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:47:17 (running for 00:49:22.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:47:22 (running for 00:49:27.39)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:47:27 (running for 00:49:32.41)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:47:32 (running for 00:49:37.43)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:47:37 (running for 00:49:42.45)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 12:47:42 (running for 00:49:47.47)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:47:47 (running for 00:49:52.49)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 12:47:52 (running for 00:49:57.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:47:57 (running for 00:50:02.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:48:02 (running for 00:50:07.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:51:38 (running for 00:53:43.49)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.269
[36m(RayTrainWorker pid=569511)[0m Rank 0 - compute backward pass done -> compute accumulator
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 8 - Elapsed time: 360 - Training loss: ['1.641'] - Validation loss: ['0.000']


[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:51:43 (running for 00:53:48.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - Index 0 - Gradient norm: 6.122
== Status ==
Current time: 2025-09-29 12:51:48 (running for 00:53:53.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data done -> model initialization




== Status ==
Current time: 2025-09-29 12:51:53 (running for 00:53:58.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:51:58 (running for 00:54:03.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:52:03 (running for 00:54:08.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.38s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:52:08 (running for 00:54:13.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:52:13 (running for 00:54:18.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.40s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:52:18 (running for 00:54:23.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:52:23 (running for 00:54:28.67)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:52:28 (running for 00:54:33.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:52:33 (running for 00:54:38.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:52:38 (running for 00:54:43.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:52:43 (running for 00:54:48.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:52:48 (running for 00:54:53.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:52:53 (running for 00:54:58.78)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:52:58 (running for 00:55:03.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:53:03 (running for 00:55:08.82)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:53:08 (running for 00:55:13.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:53:13 (running for 00:55:18.86)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:53:18 (running for 00:55:23.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:53:23 (running for 00:55:28.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:53:28 (running for 00:55:33.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:53:33 (running for 00:55:38.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:53:38 (running for 00:55:43.95)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.42s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 12:53:44 (running for 00:55:48.97)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
== Status ==
Current time: 2025-09-29 12:53:49 (running for 00:55:54.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_e



== Status ==
Current time: 2025-09-29 12:53:54 (running for 00:55:59.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:53:59 (running for 00:56:04.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:54:04 (running for 00:56:09.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:57:35 (running for 00:59:39.99)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.299
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Creating untagged checkpoint ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 12:57:36 Schedule checkpoint save with tag:  ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 12:57:36 Saved checkpoint to buffer 0.0 seconds
[36m(RayTrainWorker pid=569511)[0m Iteration: 9 - Elapsed time: 357 - Training loss: ['1.613'] - Validation loss: ['0.000']
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 12:57:37 Saved buffer to filesystem in 0.1 seconds
[36m(Ray

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 12:57:50 (running for 00:59:55.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:57:55 (running for 01:00:00.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:58:00 (running for 01:00:05.11)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:35, 10.58s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 12:58:05 (running for 01:00:10.13)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:58:10 (running for 01:00:15.15)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:21<01:23, 10.49s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:58:15 (running for 01:00:20.17)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:58:20 (running for 01:00:25.19)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:13, 10.46s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:58:25 (running for 01:00:30.21)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:58:30 (running for 01:00:35.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:58:35 (running for 01:00:40.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:58:40 (running for 01:00:45.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:58:45 (running for 01:00:50.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:58:50 (running for 01:00:55.30)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:58:55 (running for 01:01:00.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:59:00 (running for 01:01:05.35)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:59:05 (running for 01:01:10.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:59:10 (running for 01:01:15.39)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:59:15 (running for 01:01:20.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:59:20 (running for 01:01:25.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:59:25 (running for 01:01:30.47)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.45s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 12:59:30 (running for 01:01:35.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:59:35 (running for 01:01:40.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:35<00:10, 10.51s/it]
100%|██████████████████████████████████████████| 10/10 [01:46<00:00, 10.60s/it]


== Status ==
Current time: 2025-09-29 12:59:40 (running for 01:01:45.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:59:45 (running for 01:01:50.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 12:59:50 (running for 01:01:55.59)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 12:59:55 (running for 01:02:00.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:00:00 (running for 01:02:05.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.246
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 10 - Elapsed time: 355 - Training loss: ['1.662'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 13:03:36 (running for 01:05:41.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:03:41 (running for 01:05:46.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 13:03:46 (running for 01:05:51.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:03:51 (running for 01:05:56.59)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:03:56 (running for 01:06:01.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:04:01 (running for 01:06:06.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.40s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:04:06 (running for 01:06:11.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:04:11 (running for 01:06:16.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:04:16 (running for 01:06:21.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:04:21 (running for 01:06:26.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:04:26 (running for 01:06:31.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:04:31 (running for 01:06:36.74)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:04:36 (running for 01:06:41.76)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:04:41 (running for 01:06:46.78)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:04:46 (running for 01:06:51.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:04:51 (running for 01:06:56.82)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:04:56 (running for 01:07:01.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:05:01 (running for 01:07:06.86)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:05:06 (running for 01:07:11.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:05:11 (running for 01:07:16.90)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:05:16 (running for 01:07:21.92)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:05:21 (running for 01:07:26.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:05:26 (running for 01:07:31.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:05:32 (running for 01:07:36.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 13:05:37 (running for 01:07:42.01)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:05:42 (running for 01:07:47.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 13:05:52 (running for 01:07:57.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:05:57 (running for 01:08:02.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:06:02 (running for 01:08:07.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569511)[0m Rank 0 - Index 0 - Gradient norm: 6.078
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 11 - Elapsed time: 361 - Training loss: ['1.628'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 13:09:38 (running for 01:11:42.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - compute backward pass done -> compute accumulator
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.099
== Status ==
Current time: 2025-09-29 13:09:43 (running for 01:11:47.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 13:09:48 (running for 01:11:53.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:09:53 (running for 01:11:58.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:09:58 (running for 01:12:03.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.39s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:10:03 (running for 01:12:08.07)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:10:08 (running for 01:12:13.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:10:13 (running for 01:12:18.11)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:10:18 (running for 01:12:23.13)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:10:23 (running for 01:12:28.15)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:10:28 (running for 01:12:33.17)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:10:33 (running for 01:12:38.19)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:10:38 (running for 01:12:43.21)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:10:43 (running for 01:12:48.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:10:48 (running for 01:12:53.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:10:53 (running for 01:12:58.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:10:58 (running for 01:13:03.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:11:03 (running for 01:13:08.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:11:08 (running for 01:13:13.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:11:13 (running for 01:13:18.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:11:18 (running for 01:13:23.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:11:23 (running for 01:13:28.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:11:28 (running for 01:13:33.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:11:33 (running for 01:13:38.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 13:11:38 (running for 01:13:43.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
== Status ==
Current time: 2025-09-29 13:11:43 (running for 01:13:48.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_e



== Status ==
Current time: 2025-09-29 13:11:48 (running for 01:13:53.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:11:53 (running for 01:13:58.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:11:58 (running for 01:14:03.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.187
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 12 - Elapsed time: 354 - Training loss: ['1.645'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 13:15:29 (running for 01:17:34.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:15:34 (running for 01:17:39.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 13:15:39 (running for 01:17:44.41)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:15:44 (running for 01:17:49.43)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:15:49 (running for 01:17:54.45)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:15:54 (running for 01:17:59.47)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.41s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:15:59 (running for 01:18:04.49)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:16:04 (running for 01:18:09.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:16:09 (running for 01:18:14.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:16:14 (running for 01:18:19.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:16:19 (running for 01:18:24.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:16:24 (running for 01:18:29.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:16:29 (running for 01:18:34.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:16:34 (running for 01:18:39.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:16:39 (running for 01:18:44.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:16:44 (running for 01:18:49.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:16:49 (running for 01:18:54.67)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:16:54 (running for 01:18:59.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:16:59 (running for 01:19:04.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:17:04 (running for 01:19:09.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:17:09 (running for 01:19:14.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:17:14 (running for 01:19:19.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:17:19 (running for 01:19:24.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:17:24 (running for 01:19:29.82)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:17:29 (running for 01:19:34.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 13:17:34 (running for 01:19:39.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.702 - Loss image space: 0.035 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 13:17:39 (running for 01:19:44.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:17:44 (running for 01:19:49.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:17:49 (running for 01:19:54.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.050
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 13 - Elapsed time: 354 - Training loss: ['1.664'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 13:21:26 (running for 01:23:31.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:21:31 (running for 01:23:36.07)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 13:21:36 (running for 01:23:41.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:21:41 (running for 01:23:46.11)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:21:46 (running for 01:23:51.13)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.39s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:21:51 (running for 01:23:56.15)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:21:56 (running for 01:24:01.17)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:22:01 (running for 01:24:06.19)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:22:06 (running for 01:24:11.21)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:22:11 (running for 01:24:16.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:22:16 (running for 01:24:21.25)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:22:21 (running for 01:24:26.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:22:26 (running for 01:24:31.29)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:22:31 (running for 01:24:36.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:22:36 (running for 01:24:41.33)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:22:41 (running for 01:24:46.35)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:22:46 (running for 01:24:51.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:22:51 (running for 01:24:56.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:22:56 (running for 01:25:01.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:23:01 (running for 01:25:06.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:23:06 (running for 01:25:11.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:23:11 (running for 01:25:16.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:23:16 (running for 01:25:21.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:23:21 (running for 01:25:26.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 13:23:26 (running for 01:25:31.55)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
== Status ==
Current time: 2025-09-29 13:23:31 (running for 01:25:36.57)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_e



== Status ==
Current time: 2025-09-29 13:23:36 (running for 01:25:41.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:23:41 (running for 01:25:46.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:23:46 (running for 01:25:51.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.082
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Creating untagged checkpoint ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 13:27:16 Schedule checkpoint save with tag:  ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 13:27:16 Saved checkpoint to buffer 0.0 seconds
[36m(RayTrainWorker pid=569511)[0m Iteration: 14 - Elapsed time: 353 - Training loss: ['1.617'] - Validation loss: ['0.000']
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 13:27:16 Saved buffer to filesystem in 0.1 seconds
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 13:27:16 Completed saving checkpoint.
== Status ==
Current time: 2025-09-29 13:27:17 (running for 01:29:22.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 13:27:27 (running for 01:29:32.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:27:32 (running for 01:29:37.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:27:37 (running for 01:29:42.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:27:42 (running for 01:29:47.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 10%|████▎                                      | 1/10 [00:10<01:33, 10.40s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:27:47 (running for 01:29:52.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:27:52 (running for 01:29:57.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:24, 10.50s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:27:57 (running for 01:30:02.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:28:02 (running for 01:30:07.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:13, 10.50s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:28:07 (running for 01:30:12.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:28:12 (running for 01:30:17.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.49s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:28:17 (running for 01:30:22.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:28:22 (running for 01:30:27.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.48s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:28:27 (running for 01:30:32.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:28:32 (running for 01:30:37.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.48s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:28:37 (running for 01:30:42.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:28:42 (running for 01:30:47.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.48s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:28:47 (running for 01:30:52.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:28:52 (running for 01:30:57.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.48s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:28:57 (running for 01:31:02.95)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:29:03 (running for 01:31:07.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:29:08 (running for 01:31:12.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:29:13 (running for 01:31:18.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.48s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.44s/it]


== Status ==
Current time: 2025-09-29 13:29:18 (running for 01:31:23.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 13:29:23 (running for 01:31:28.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
== Status ==
Current time: 2025-09-29 13:29:28 (running for 01:31:33.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_e



== Status ==
Current time: 2025-09-29 13:29:33 (running for 01:31:38.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:29:38 (running for 01:31:43.11)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:29:43 (running for 01:31:48.13)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.183
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 15 - Elapsed time: 357 - Training loss: ['1.647'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 13:33:13 (running for 01:35:18.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:33:19 (running for 01:35:23.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 13:33:24 (running for 01:35:28.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:33:29 (running for 01:35:34.01)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:33:34 (running for 01:35:39.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:33:39 (running for 01:35:44.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.39s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:33:44 (running for 01:35:49.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:33:49 (running for 01:35:54.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:33:54 (running for 01:35:59.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:33:59 (running for 01:36:04.13)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:34:04 (running for 01:36:09.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:34:09 (running for 01:36:14.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:34:14 (running for 01:36:19.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:34:19 (running for 01:36:24.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:34:24 (running for 01:36:29.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:34:29 (running for 01:36:34.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:34:34 (running for 01:36:39.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:34:39 (running for 01:36:44.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:34:44 (running for 01:36:49.30)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:34:49 (running for 01:36:54.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:34:54 (running for 01:36:59.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:34:59 (running for 01:37:04.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:35:04 (running for 01:37:09.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:35:09 (running for 01:37:14.39)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization
== Status ==
Current time: 2025-09-29

100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 13:35:19 (running for 01:37:24.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.622 - Loss image space: 0.035 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 13:35:24 (running for 01:37:29.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:35:29 (running for 01:37:34.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:35:34 (running for 01:37:39.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.088
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 16 - Elapsed time: 353 - Training loss: ['1.604'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 13:39:10 (running for 01:41:15.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - compute backward pass done -> compute accumulator
[36m(RayTrainWorker pid=569511)[0m Rank 0 - Index 0 - Gradient norm: 5.718
== Status ==
Current time: 2025-09-29 13:39:15 (running for 01:41:20.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 13:39:20 (running for 01:41:25.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:39:25 (running for 01:41:30.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:39:30 (running for 01:41:35.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:39:35 (running for 01:41:40.67)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:39:40 (running for 01:41:45.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:39:45 (running for 01:41:50.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:39:50 (running for 01:41:55.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:39:55 (running for 01:42:00.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:40:00 (running for 01:42:05.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:40:05 (running for 01:42:10.78)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:40:10 (running for 01:42:15.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:40:15 (running for 01:42:20.82)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:40:20 (running for 01:42:25.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:40:25 (running for 01:42:30.86)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:40:30 (running for 01:42:35.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:40:35 (running for 01:42:40.90)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:40:40 (running for 01:42:45.92)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:40:45 (running for 01:42:50.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:40:50 (running for 01:42:55.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:40:56 (running for 01:43:00.97)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:41:01 (running for 01:43:05.99)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:41:06 (running for 01:43:11.01)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 13:41:11 (running for 01:43:16.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.595 - Loss image space: 0.036 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 13:41:16 (running for 01:43:21.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:41:21 (running for 01:43:26.07)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:41:26 (running for 01:43:31.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.134
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 17 - Elapsed time: 353 - Training loss: ['1.614'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 13:45:01 (running for 01:47:06.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:45:07 (running for 01:47:11.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 13:45:12 (running for 01:47:16.99)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:45:17 (running for 01:47:22.01)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:45:22 (running for 01:47:27.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:45:27 (running for 01:47:32.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:45:32 (running for 01:47:37.07)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:45:37 (running for 01:47:42.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:45:42 (running for 01:47:47.11)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:45:47 (running for 01:47:52.13)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:45:52 (running for 01:47:57.15)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:45:57 (running for 01:48:02.17)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.45s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:46:02 (running for 01:48:07.19)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:46:07 (running for 01:48:12.21)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.47s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:46:12 (running for 01:48:17.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:46:17 (running for 01:48:22.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.47s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:46:22 (running for 01:48:27.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:46:27 (running for 01:48:32.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.48s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:46:32 (running for 01:48:37.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:46:37 (running for 01:48:42.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.48s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:46:42 (running for 01:48:47.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:46:47 (running for 01:48:52.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.49s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:46:52 (running for 01:48:57.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:46:57 (running for 01:49:02.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:35<00:10, 10.46s/it]
100%|██████████████████████████████████████████| 10/10 [01:45<00:00, 10.56s/it]


== Status ==
Current time: 2025-09-29 13:47:02 (running for 01:49:07.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:47:07 (running for 01:49:12.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 13:47:12 (running for 01:49:17.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:47:17 (running for 01:49:22.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:47:22 (running for 01:49:27.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.005
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 18 - Elapsed time: 353 - Training loss: ['1.640'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 13:50:53 (running for 01:52:58.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:50:58 (running for 01:53:03.33)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 13:51:03 (running for 01:53:08.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:51:08 (running for 01:53:13.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:51:13 (running for 01:53:18.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:51:18 (running for 01:53:23.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.41s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:51:23 (running for 01:53:28.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:51:28 (running for 01:53:33.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:51:33 (running for 01:53:38.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:51:38 (running for 01:53:43.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:51:43 (running for 01:53:48.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:51:48 (running for 01:53:53.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:51:53 (running for 01:53:58.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:51:58 (running for 01:54:03.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:52:03 (running for 01:54:08.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:52:08 (running for 01:54:13.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:52:13 (running for 01:54:18.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:52:18 (running for 01:54:23.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:52:23 (running for 01:54:28.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:52:28 (running for 01:54:33.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:52:33 (running for 01:54:38.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:52:38 (running for 01:54:43.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:52:43 (running for 01:54:48.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:52:48 (running for 01:54:53.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization
== Status ==
Current time: 2025-09-29

100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.44s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.49s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.47s/it]


== Status ==
Current time: 2025-09-29 13:52:58 (running for 01:55:03.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.576 - Loss image space: 0.036 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 13:53:03 (running for 01:55:08.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:53:08 (running for 01:55:13.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:53:13 (running for 01:55:18.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.037
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Creating untagged checkpoint ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 13:56:48 Schedule checkpoint save with tag:  ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 13:56:48 Saved checkpoint to buffer 0.0 seconds
[36m(RayTrainWorker pid=569511)[0m Iteration: 19 - Elapsed time: 355 - Training loss: ['1.606'] - Validation loss: ['0.000']
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 13:56:48 Saved buffer to filesystem in 0.1 seconds
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 13:56:48 Completed saving checkpoint.
== Status ==
Current time: 2025-09-29 13:56:49 (running for 01:58:54.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 13:56:59 (running for 01:59:04.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:57:04 (running for 01:59:09.78)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:57:09 (running for 01:59:14.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:57:14 (running for 01:59:19.82)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.38s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 13:57:19 (running for 01:59:24.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:57:24 (running for 01:59:29.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:57:29 (running for 01:59:34.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:57:34 (running for 01:59:39.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:57:39 (running for 01:59:44.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:57:44 (running for 01:59:49.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:57:49 (running for 01:59:54.95)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:57:55 (running for 01:59:59.97)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:58:00 (running for 02:00:04.99)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:58:05 (running for 02:00:10.01)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:58:10 (running for 02:00:15.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:58:15 (running for 02:00:20.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:58:20 (running for 02:00:25.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:58:25 (running for 02:00:30.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:58:30 (running for 02:00:35.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:58:35 (running for 02:00:40.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 13:58:40 (running for 02:00:45.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:58:45 (running for 02:00:50.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:58:50 (running for 02:00:55.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 13:58:55 (running for 02:01:00.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.655 - Loss image space: 0.036 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 13:59:00 (running for 02:01:05.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:59:05 (running for 02:01:10.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 13:59:10 (running for 02:01:15.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.017
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 20 - Elapsed time: 354 - Training loss: ['1.643'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 14:02:46 (running for 02:04:51.29)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - compute backward pass done -> compute accumulator
[36m(RayTrainWorker pid=569511)[0m Rank 0 - Index 0 - Gradient norm: 5.189
== Status ==
Current time: 2025-09-29 14:02:51 (running for 02:04:56.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:02:56 (running for 02:05:01.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:03:01 (running for 02:05:06.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:03:06 (running for 02:05:11.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:03:11 (running for 02:05:16.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:03:16 (running for 02:05:21.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.40s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:03:21 (running for 02:05:26.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:03:26 (running for 02:05:31.47)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:03:31 (running for 02:05:36.49)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:03:36 (running for 02:05:41.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:03:41 (running for 02:05:46.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:03:46 (running for 02:05:51.55)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:03:51 (running for 02:05:56.57)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:03:56 (running for 02:06:01.59)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:04:01 (running for 02:06:06.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:04:06 (running for 02:06:11.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:04:11 (running for 02:06:16.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:04:16 (running for 02:06:21.67)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:04:21 (running for 02:06:26.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:04:26 (running for 02:06:31.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:04:31 (running for 02:06:36.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:04:36 (running for 02:06:41.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:04:41 (running for 02:06:46.76)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 14:04:46 (running for 02:06:51.78)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
== Status ==
Current time: 2025-09-29 14:04:51 (running for 02:06:56.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_e



== Status ==
Current time: 2025-09-29 14:04:56 (running for 02:07:01.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:05:01 (running for 02:07:06.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:05:06 (running for 02:07:11.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.045
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 21 - Elapsed time: 354 - Training loss: ['1.640'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 14:08:37 (running for 02:10:42.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:08:42 (running for 02:10:47.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:08:47 (running for 02:10:52.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:08:52 (running for 02:10:57.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:08:57 (running for 02:11:02.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:09:02 (running for 02:11:07.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.41s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:09:07 (running for 02:11:12.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:09:12 (running for 02:11:17.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:09:17 (running for 02:11:22.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:09:22 (running for 02:11:27.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:09:27 (running for 02:11:32.92)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:09:32 (running for 02:11:37.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:09:37 (running for 02:11:42.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:09:43 (running for 02:11:47.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:09:48 (running for 02:11:53.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:09:53 (running for 02:11:58.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:09:58 (running for 02:12:03.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:10:03 (running for 02:12:08.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:10:08 (running for 02:12:13.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:10:13 (running for 02:12:18.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:10:18 (running for 02:12:23.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:10:23 (running for 02:12:28.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.45s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:10:28 (running for 02:12:33.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:10:33 (running for 02:12:38.17)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.44s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 14:10:38 (running for 02:12:43.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:10:43 (running for 02:12:48.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 14:10:48 (running for 02:12:53.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:10:53 (running for 02:12:58.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:10:58 (running for 02:13:03.29)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.967
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 22 - Elapsed time: 354 - Training loss: ['1.632'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 14:14:34 (running for 02:16:39.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:14:39 (running for 02:16:44.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:14:44 (running for 02:16:49.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:14:49 (running for 02:16:54.29)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:14:54 (running for 02:16:59.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.40s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:14:59 (running for 02:17:04.33)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:15:04 (running for 02:17:09.35)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:15:09 (running for 02:17:14.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:15:14 (running for 02:17:19.39)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:15:19 (running for 02:17:24.41)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:15:24 (running for 02:17:29.43)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:15:29 (running for 02:17:34.45)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:15:34 (running for 02:17:39.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:15:39 (running for 02:17:44.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:15:44 (running for 02:17:49.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:15:49 (running for 02:17:54.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:15:54 (running for 02:17:59.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:15:59 (running for 02:18:04.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:16:04 (running for 02:18:09.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:16:09 (running for 02:18:14.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:16:14 (running for 02:18:19.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:16:19 (running for 02:18:24.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:16:24 (running for 02:18:29.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:16:29 (running for 02:18:34.67)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 14:16:34 (running for 02:18:39.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.606 - Loss image space: 0.037 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass
== Status ==
Current time: 2025-09-29 14:16:3



== Status ==
Current time: 2025-09-29 14:16:44 (running for 02:18:49.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:16:49 (running for 02:18:54.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:16:54 (running for 02:18:59.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 5.002
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 23 - Elapsed time: 352 - Training loss: ['1.625'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 14:20:25 (running for 02:22:30.57)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:20:30 (running for 02:22:35.59)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:20:35 (running for 02:22:40.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:20:40 (running for 02:22:45.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:20:45 (running for 02:22:50.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:20:50 (running for 02:22:55.67)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:20:55 (running for 02:23:00.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:21:00 (running for 02:23:05.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:21:05 (running for 02:23:10.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:21:10 (running for 02:23:15.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:21:15 (running for 02:23:20.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:21:20 (running for 02:23:25.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:21:25 (running for 02:23:30.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:21:30 (running for 02:23:35.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:21:35 (running for 02:23:40.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:21:40 (running for 02:23:45.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:21:45 (running for 02:23:50.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:21:50 (running for 02:23:55.90)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:21:55 (running for 02:24:00.92)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:22:00 (running for 02:24:05.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:22:06 (running for 02:24:10.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:22:11 (running for 02:24:15.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:22:16 (running for 02:24:21.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:22:21 (running for 02:24:26.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.47s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 14:22:26 (running for 02:24:31.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:22:31 (running for 02:24:36.07)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 14:22:36 (running for 02:24:41.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:22:41 (running for 02:24:46.11)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:22:46 (running for 02:24:51.13)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.886
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Creating tagged checkpoint ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 14:26:18 Schedule checkpoint save with tag: _epoch_1 ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 14:26:18 Saved checkpoint to buffer 0.0 seconds
[36m(RayTrainWorker pid=569511)[0m Iteration: 24 - Elapsed time: 354 - Training loss: ['1.622'] - Validation loss: ['0.000']
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 14:26:18 Saved buffer to filesystem in 0.0 seconds
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 14:26:18 Completed saving checkpoint.
== Status ==
Current time: 2025-09-29 14:26:22 (running for 02:28:27.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:26:27 (running for 02:28:32.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:26:32 (running for 02:28:37.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:26:37 (running for 02:28:42.07)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:26:42 (running for 02:28:47.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:26:47 (running for 02:28:52.11)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.39s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:26:52 (running for 02:28:57.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:26:57 (running for 02:29:02.15)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:27:02 (running for 02:29:07.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:27:07 (running for 02:29:12.19)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.40s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:27:12 (running for 02:29:17.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:27:17 (running for 02:29:22.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.40s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:27:22 (running for 02:29:27.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:27:27 (running for 02:29:32.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:27:32 (running for 02:29:37.30)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:27:37 (running for 02:29:42.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:27:42 (running for 02:29:47.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:27:47 (running for 02:29:52.35)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:27:52 (running for 02:29:57.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:27:57 (running for 02:30:02.39)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:28:02 (running for 02:30:07.41)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:28:07 (running for 02:30:12.43)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:28:12 (running for 02:30:17.45)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:28:17 (running for 02:30:22.47)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization
== Status ==
Current time: 2025-09-29

100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 14:28:27 (running for 02:30:32.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.628 - Loss image space: 0.036 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 14:28:32 (running for 02:30:37.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:28:37 (running for 02:30:42.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:28:42 (running for 02:30:47.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.827
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 25 - Elapsed time: 357 - Training loss: ['1.639'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 14:32:18 (running for 02:34:23.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:32:23 (running for 02:34:28.47)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:32:28 (running for 02:34:33.49)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:32:33 (running for 02:34:38.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:32:38 (running for 02:34:43.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.39s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:32:43 (running for 02:34:48.55)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:32:48 (running for 02:34:53.57)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.39s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:32:53 (running for 02:34:58.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:32:58 (running for 02:35:03.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.40s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:33:03 (running for 02:35:08.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:33:08 (running for 02:35:13.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:33:13 (running for 02:35:18.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:33:18 (running for 02:35:23.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:33:23 (running for 02:35:28.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:33:28 (running for 02:35:33.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:33:33 (running for 02:35:38.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:33:38 (running for 02:35:43.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:33:43 (running for 02:35:48.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:33:48 (running for 02:35:53.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:33:53 (running for 02:35:58.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:33:58 (running for 02:36:03.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:34:03 (running for 02:36:08.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:34:08 (running for 02:36:13.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:34:13 (running for 02:36:18.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 14:34:18 (running for 02:36:23.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
== Status ==
Current time: 2025-09-29 14:34:23 (running for 02:36:28.95)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_e



== Status ==
Current time: 2025-09-29 14:34:29 (running for 02:36:33.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:34:34 (running for 02:36:39.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:34:39 (running for 02:36:44.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:38:09 (running for 02:40:14.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.762
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 26 - Elapsed time: 355 - Training loss: ['1.614'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 14:38:14 (running for 02:40:19.82)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:38:24 (running for 02:40:29.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:38:29 (running for 02:40:34.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:38:34 (running for 02:40:39.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.40s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:38:39 (running for 02:40:44.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:38:44 (running for 02:40:49.95)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:38:50 (running for 02:40:54.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:38:55 (running for 02:40:59.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:39:00 (running for 02:41:05.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:39:05 (running for 02:41:10.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:39:10 (running for 02:41:15.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:39:15 (running for 02:41:20.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:39:20 (running for 02:41:25.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:39:25 (running for 02:41:30.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:39:30 (running for 02:41:35.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:39:35 (running for 02:41:40.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:39:40 (running for 02:41:45.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:39:45 (running for 02:41:50.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:39:50 (running for 02:41:55.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:39:55 (running for 02:42:00.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:40:00 (running for 02:42:05.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:40:05 (running for 02:42:10.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:40:10 (running for 02:42:15.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 14:40:15 (running for 02:42:20.29)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.614 - Loss image space: 0.036 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass
== Status ==
Current time: 2025-09-29 14:40:2



== Status ==
Current time: 2025-09-29 14:40:25 (running for 02:42:30.33)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:40:30 (running for 02:42:35.35)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:40:35 (running for 02:42:40.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.820
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 27 - Elapsed time: 354 - Training loss: ['1.622'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 14:44:06 (running for 02:46:11.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:44:11 (running for 02:46:16.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:44:16 (running for 02:46:21.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:44:21 (running for 02:46:26.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:44:26 (running for 02:46:31.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:44:31 (running for 02:46:36.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.43s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:44:36 (running for 02:46:41.47)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:44:41 (running for 02:46:46.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:44:46 (running for 02:46:51.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:44:51 (running for 02:46:56.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:44:56 (running for 02:47:01.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:45:01 (running for 02:47:06.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:45:06 (running for 02:47:11.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:45:11 (running for 02:47:16.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:45:16 (running for 02:47:21.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:45:21 (running for 02:47:26.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:45:26 (running for 02:47:31.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:45:31 (running for 02:47:36.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:45:36 (running for 02:47:41.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:45:41 (running for 02:47:46.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:45:46 (running for 02:47:51.74)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:45:51 (running for 02:47:56.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:45:56 (running for 02:48:01.78)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:46:01 (running for 02:48:06.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:46:06 (running for 02:48:11.82)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.47s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 14:46:11 (running for 02:48:16.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.709 - Loss image space: 0.034 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 14:46:16 (running for 02:48:21.86)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:46:21 (running for 02:48:26.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:46:26 (running for 02:48:31.90)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:50:02 (running for 02:52:07.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:50:07 (running for 02:52:12.74)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569510)[0m Rank 1 - compute backward pass done -> compute accumulator
[36m(RayTrainWorker pid=569511)[0m Rank 0 - Index 0 - Gradient norm: 5.212
[36m(RayTrainWorker pid=569510)[0m Rank 1 - read

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:50:12 (running for 02:52:17.76)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:50:17 (running for 02:52:22.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:50:22 (running for 02:52:27.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:50:27 (running for 02:52:32.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.39s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:50:32 (running for 02:52:37.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:50:37 (running for 02:52:42.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:50:42 (running for 02:52:47.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:50:47 (running for 02:52:52.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:50:52 (running for 02:52:57.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:50:57 (running for 02:53:02.95)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:51:03 (running for 02:53:07.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:51:08 (running for 02:53:12.99)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:51:13 (running for 02:53:18.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:51:18 (running for 02:53:23.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:51:23 (running for 02:53:28.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:51:28 (running for 02:53:33.07)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:51:33 (running for 02:53:38.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:51:38 (running for 02:53:43.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:51:43 (running for 02:53:48.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:51:48 (running for 02:53:53.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:51:53 (running for 02:53:58.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:51:58 (running for 02:54:03.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:52:03 (running for 02:54:08.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.47s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 14:52:08 (running for 02:54:13.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.581 - Loss image space: 0.035 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 14:52:13 (running for 02:54:18.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:52:18 (running for 02:54:23.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:52:23 (running for 02:54:28.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.717
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Creating untagged checkpoint ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 14:55:56 Schedule checkpoint save with tag:  ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 14:55:56 Saved checkpoint to buffer 0.0 seconds
[36m(RayTrainWorker pid=569511)[0m Iteration: 29 - Elapsed time: 354 - Training loss: ['1.609'] - Validation loss: ['0.000']
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 14:55:56 Saved buffer to filesystem in 0.1 seconds
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 14:55:56 Completed saving checkpoint.
== Status ==
Current time: 2025-09-29 14:55:59 (running for 02:58:04.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 14:56:09 (running for 02:58:14.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:56:14 (running for 02:58:19.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:56:19 (running for 02:58:24.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.41s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 14:56:24 (running for 02:58:29.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:56:29 (running for 02:58:34.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:56:34 (running for 02:58:39.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 20%|████████▌                                  | 2/10 [00:20<01:23, 10.40s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:56:39 (running for 02:58:44.30)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:56:44 (running for 02:58:49.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.40s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:56:49 (running for 02:58:54.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:56:54 (running for 02:58:59.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:56:59 (running for 02:59:04.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:57:04 (running for 02:59:09.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:57:09 (running for 02:59:14.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:57:14 (running for 02:59:19.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:57:19 (running for 02:59:24.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:57:24 (running for 02:59:29.47)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:57:29 (running for 02:59:34.49)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:57:34 (running for 02:59:39.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:57:39 (running for 02:59:44.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:57:44 (running for 02:59:49.55)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.45s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 14:57:49 (running for 02:59:54.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:57:54 (running for 02:59:59.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.47s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 14:57:59 (running for 03:00:04.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:58:04 (running for 03:00:09.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 14:58:09 (running for 03:00:14.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:58:14 (running for 03:00:19.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 14:58:19 (running for 03:00:24.74)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:01:50 (running for 03:03:55.55)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.665
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 30 - Elapsed time: 354 - Training loss: ['1.631'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 15:01:55 (running for 03:04:00.57)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:02:00 (running for 03:04:05.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:02:05 (running for 03:04:10.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:02:10 (running for 03:04:15.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:02:15 (running for 03:04:20.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.40s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:02:20 (running for 03:04:25.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:02:25 (running for 03:04:30.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:02:30 (running for 03:04:35.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:02:35 (running for 03:04:40.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:02:40 (running for 03:04:45.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:02:45 (running for 03:04:50.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:02:50 (running for 03:04:55.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:02:55 (running for 03:05:00.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:03:00 (running for 03:05:05.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:03:05 (running for 03:05:10.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:03:10 (running for 03:05:15.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:03:15 (running for 03:05:20.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:03:20 (running for 03:05:25.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:03:25 (running for 03:05:30.92)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:03:30 (running for 03:05:35.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:03:35 (running for 03:05:40.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:03:41 (running for 03:05:45.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:03:46 (running for 03:05:51.00)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:03:51 (running for 03:05:56.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 15:03:56 (running for 03:06:01.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.623 - Loss image space: 0.035 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 15:04:01 (running for 03:06:06.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:04:06 (running for 03:06:11.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:04:11 (running for 03:06:16.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.759
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 31 - Elapsed time: 353 - Training loss: ['1.631'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 15:07:46 (running for 03:09:51.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:07:51 (running for 03:09:56.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:07:56 (running for 03:10:01.95)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:08:02 (running for 03:10:06.97)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:08:07 (running for 03:10:11.99)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.41s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:08:12 (running for 03:10:17.01)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:08:17 (running for 03:10:22.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:08:22 (running for 03:10:27.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:08:27 (running for 03:10:32.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:08:32 (running for 03:10:37.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 30%|████████████▉                              | 3/10 [00:31<01:12, 10.40s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:08:37 (running for 03:10:42.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:08:42 (running for 03:10:47.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:08:47 (running for 03:10:52.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:08:52 (running for 03:10:57.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:08:57 (running for 03:11:02.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:09:02 (running for 03:11:07.20)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:09:07 (running for 03:11:12.22)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:09:12 (running for 03:11:17.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:09:17 (running for 03:11:22.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:09:22 (running for 03:11:27.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:09:27 (running for 03:11:32.30)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:09:32 (running for 03:11:37.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:09:37 (running for 03:11:42.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:09:42 (running for 03:11:47.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.47s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 15:09:47 (running for 03:11:52.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
== Status ==
Current time: 2025-09-29 15:09:52 (running for 03:11:57.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_e



== Status ==
Current time: 2025-09-29 15:09:57 (running for 03:12:02.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:10:02 (running for 03:12:07.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:10:07 (running for 03:12:12.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.583
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 32 - Elapsed time: 354 - Training loss: ['1.626'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 15:13:38 (running for 03:15:43.25)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:13:43 (running for 03:15:48.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:13:48 (running for 03:15:53.29)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:13:53 (running for 03:15:58.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:13:58 (running for 03:16:03.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:14:03 (running for 03:16:08.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.39s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:14:08 (running for 03:16:13.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:14:13 (running for 03:16:18.39)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:14:18 (running for 03:16:23.41)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:14:23 (running for 03:16:28.43)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:14:28 (running for 03:16:33.45)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:14:33 (running for 03:16:38.47)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:14:38 (running for 03:16:43.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:14:43 (running for 03:16:48.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:14:48 (running for 03:16:53.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:14:53 (running for 03:16:58.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:14:58 (running for 03:17:03.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:15:03 (running for 03:17:08.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:15:08 (running for 03:17:13.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:15:13 (running for 03:17:18.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:15:18 (running for 03:17:23.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:15:23 (running for 03:17:28.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:15:28 (running for 03:17:33.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:15:33 (running for 03:17:38.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:15:38 (running for 03:17:43.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.47s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 15:15:43 (running for 03:17:48.74)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.622 - Loss image space: 0.037 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 15:15:48 (running for 03:17:53.76)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:15:53 (running for 03:17:58.78)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:15:58 (running for 03:18:03.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569511)[0m Rank 0 - Index 0 - Gradient norm: 4.646
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 33 - Elapsed time: 353 - Training loss: ['1.635'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 15:19:34 (running for 03:21:39.63)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - compute backward pass done -> compute accumulator
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.626
== Status ==
Current time: 2025-09-29 15:19:39 (running for 03:21:44.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:19:44 (running for 03:21:49.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:19:49 (running for 03:21:54.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:19:54 (running for 03:21:59.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:19:59 (running for 03:22:04.74)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:20:04 (running for 03:22:09.76)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:20:09 (running for 03:22:14.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:20:14 (running for 03:22:19.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:20:19 (running for 03:22:24.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:20:24 (running for 03:22:29.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:20:29 (running for 03:22:34.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:20:34 (running for 03:22:39.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:20:39 (running for 03:22:44.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:20:45 (running for 03:22:50.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:20:50 (running for 03:22:55.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:20:55 (running for 03:23:00.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:21:00 (running for 03:23:05.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:21:05 (running for 03:23:10.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:21:10 (running for 03:23:15.15)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:21:15 (running for 03:23:20.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:21:20 (running for 03:23:25.19)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:21:25 (running for 03:23:30.21)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:21:30 (running for 03:23:35.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 15:21:35 (running for 03:23:40.25)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.635 - Loss image space: 0.035 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 15:21:40 (running for 03:23:45.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:21:45 (running for 03:23:50.29)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:21:50 (running for 03:23:55.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:25:26 (running for 03:27:31.33)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.638
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Creating untagged checkpoint ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 15:25:27 Schedule checkpoint save with tag:  ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 15:25:27 Saved checkpoint to buffer 0.0 seconds
[36m(RayTrainWorker pid=569511)[0m Iteration: 34 - Elapsed time: 356 - Training loss: ['1.607'] - Validation loss: ['0.000']
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 15:25:27 Saved buffer to filesystem in 0.2 seconds
[36m(Ra

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:25:41 (running for 03:27:46.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:25:46 (running for 03:27:51.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:25:51 (running for 03:27:56.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.41s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:25:56 (running for 03:28:01.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:26:01 (running for 03:28:06.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:26:06 (running for 03:28:11.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:26:11 (running for 03:28:16.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:26:16 (running for 03:28:21.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:26:21 (running for 03:28:26.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:26:26 (running for 03:28:31.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:26:31 (running for 03:28:36.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:26:36 (running for 03:28:41.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:26:41 (running for 03:28:46.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:26:46 (running for 03:28:51.67)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:26:51 (running for 03:28:56.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:26:56 (running for 03:29:01.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:27:01 (running for 03:29:06.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:27:06 (running for 03:29:11.74)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:27:11 (running for 03:29:16.76)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:27:16 (running for 03:29:21.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:27:21 (running for 03:29:26.80)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:27:26 (running for 03:29:31.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 15:27:31 (running for 03:29:36.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.691 - Loss image space: 0.036 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 15:27:36 (running for 03:29:41.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:27:41 (running for 03:29:46.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:27:46 (running for 03:29:51.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.592
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 35 - Elapsed time: 354 - Training loss: ['1.656'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 15:31:22 (running for 03:33:27.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:31:27 (running for 03:33:32.82)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:31:32 (running for 03:33:37.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:31:37 (running for 03:33:42.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:31:42 (running for 03:33:47.91)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:31:47 (running for 03:33:52.93)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:31:52 (running for 03:33:57.95)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:31:58 (running for 03:34:02.97)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:32:03 (running for 03:34:07.98)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:32:08 (running for 03:34:13.01)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:32:13 (running for 03:34:18.02)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:32:18 (running for 03:34:23.04)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:32:23 (running for 03:34:28.06)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:32:28 (running for 03:34:33.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:32:33 (running for 03:34:38.10)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:32:38 (running for 03:34:43.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:32:43 (running for 03:34:48.13)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:32:48 (running for 03:34:53.15)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:32:53 (running for 03:34:58.17)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:32:58 (running for 03:35:03.19)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:33:03 (running for 03:35:08.21)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:33:08 (running for 03:35:13.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:33:13 (running for 03:35:18.25)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:33:18 (running for 03:35:23.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:33:23 (running for 03:35:28.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 15:33:28 (running for 03:35:33.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.685 - Loss image space: 0.034 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 15:33:33 (running for 03:35:38.33)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:33:38 (running for 03:35:43.35)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:33:43 (running for 03:35:48.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.467
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 36 - Elapsed time: 353 - Training loss: ['1.622'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 15:37:19 (running for 03:39:24.21)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:37:24 (running for 03:39:29.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:37:29 (running for 03:39:34.26)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:37:34 (running for 03:39:39.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:37:39 (running for 03:39:44.30)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.39s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:37:44 (running for 03:39:49.31)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:37:49 (running for 03:39:54.33)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:37:54 (running for 03:39:59.35)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:37:59 (running for 03:40:04.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:38:04 (running for 03:40:09.39)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:38:09 (running for 03:40:14.41)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:38:14 (running for 03:40:19.43)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:38:19 (running for 03:40:24.45)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:38:24 (running for 03:40:29.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:38:29 (running for 03:40:34.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:38:34 (running for 03:40:39.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:38:39 (running for 03:40:44.52)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:38:44 (running for 03:40:49.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:38:49 (running for 03:40:54.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:38:54 (running for 03:40:59.58)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:38:59 (running for 03:41:04.60)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:39:04 (running for 03:41:09.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:39:09 (running for 03:41:14.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:39:14 (running for 03:41:19.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 15:39:19 (running for 03:41:24.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.660 - Loss image space: 0.036 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 15:39:24 (running for 03:41:29.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:39:29 (running for 03:41:34.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:39:34 (running for 03:41:39.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:43:15 (running for 03:45:20.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - compute backward pass done -> compute accumulator
[36m(RayTrainWorker pid=569511)[0m Rank 0 - Index 0 - Gradient norm: 4.419
== Status ==
Current time: 2025-09-29 15:43:20 (running for 03:45:25.65)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - read

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:43:25 (running for 03:45:30.67)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:43:30 (running for 03:45:35.69)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:43:35 (running for 03:45:40.71)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.42s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:43:40 (running for 03:45:45.73)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:43:45 (running for 03:45:50.75)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:43:50 (running for 03:45:55.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:43:55 (running for 03:46:00.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:44:00 (running for 03:46:05.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:44:05 (running for 03:46:10.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:44:10 (running for 03:46:15.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:44:15 (running for 03:46:20.86)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:44:20 (running for 03:46:25.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:44:25 (running for 03:46:30.90)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:44:30 (running for 03:46:35.92)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:44:35 (running for 03:46:40.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:44:40 (running for 03:46:45.96)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:12<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:44:46 (running for 03:46:50.97)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:44:51 (running for 03:46:55.99)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:44:56 (running for 03:47:01.01)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:45:01 (running for 03:47:06.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:45:06 (running for 03:47:11.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:45:11 (running for 03:47:16.07)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.43s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.47s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 15:45:16 (running for 03:47:21.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.698 - Loss image space: 0.035 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass
== Status ==
Current time: 2025-09-29 15:45:2



== Status ==
Current time: 2025-09-29 15:45:26 (running for 03:47:31.14)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:45:31 (running for 03:47:36.16)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:45:36 (running for 03:47:41.18)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.421
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 38 - Elapsed time: 354 - Training loss: ['1.659'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 15:49:07 (running for 03:51:12.01)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:49:12 (running for 03:51:17.03)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:49:17 (running for 03:51:22.05)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:49:22 (running for 03:51:27.08)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:49:27 (running for 03:51:32.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:49:32 (running for 03:51:37.12)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 10%|████▎                                      | 1/10 [00:10<01:33, 10.40s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:49:37 (running for 03:51:42.13)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:49:42 (running for 03:51:47.15)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:49:47 (running for 03:51:52.17)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:49:52 (running for 03:51:57.19)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:49:57 (running for 03:52:02.21)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:50:02 (running for 03:52:07.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:50:07 (running for 03:52:12.25)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:50:12 (running for 03:52:17.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:50:17 (running for 03:52:22.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:50:22 (running for 03:52:27.30)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:50:27 (running for 03:52:32.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:50:32 (running for 03:52:37.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.45s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:50:37 (running for 03:52:42.36)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:50:42 (running for 03:52:47.38)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:50:47 (running for 03:52:52.40)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:50:52 (running for 03:52:57.42)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:50:57 (running for 03:53:02.44)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:51:02 (running for 03:53:07.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.44s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.47s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 15:51:07 (running for 03:53:12.47)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:51:12 (running for 03:53:17.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 15:51:17 (running for 03:53:22.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:51:22 (running for 03:53:27.54)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:51:27 (running for 03:53:32.56)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:54:58 (running for 03:57:03.39)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.449
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Creating untagged checkpoint ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 15:54:59 Schedule checkpoint save with tag:  ...
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 15:54:59 Saved checkpoint to buffer 0.0 seconds
[36m(RayTrainWorker pid=569511)[0m Iteration: 39 - Elapsed time: 353 - Training loss: ['1.612'] - Validation loss: ['0.000']
[36m(RayTrainWorker pid=569511)[0m 2025-09-29 15:54:59 Saved buffer to filesystem in 0.1 seconds
[36m(Ra

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 15:55:13 (running for 03:57:18.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:55:18 (running for 03:57:23.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:55:23 (running for 03:57:28.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.43s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 15:55:28 (running for 03:57:33.51)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:55:33 (running for 03:57:38.53)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:55:38 (running for 03:57:43.55)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:55:43 (running for 03:57:48.57)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 30%|████████████▉                              | 3/10 [00:31<01:12, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:55:48 (running for 03:57:53.59)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:55:53 (running for 03:57:58.61)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:55:58 (running for 03:58:03.62)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:56:03 (running for 03:58:08.64)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.42s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:56:08 (running for 03:58:13.66)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:56:13 (running for 03:58:18.68)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:56:18 (running for 03:58:23.70)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:56:23 (running for 03:58:28.72)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:56:28 (running for 03:58:33.74)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:56:33 (running for 03:58:38.76)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:56:38 (running for 03:58:43.77)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:56:43 (running for 03:58:48.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:56:48 (running for 03:58:53.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 15:56:53 (running for 03:58:58.83)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:56:58 (running for 03:59:03.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.44s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.45s/it]


== Status ==
Current time: 2025-09-29 15:57:03 (running for 03:59:08.88)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([156, 156, 156, 2, 1]) torch.Size([156, 156, 156, 2, 1]) None
[36m(RayTrainWorker pid=569510)[0m Rank 1 - Loss k-space: 1.624 - Loss image space: 0.037 - Loss Wavelet 0.000 - Loss Hankel 0.000 - Loss Casorati 0.000
[36m(RayTrainWorker pid=569510)[0m Rank 1 - loss fn initialization done -> compute backward pass




== Status ==
Current time: 2025-09-29 15:57:08 (running for 03:59:13.90)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:57:13 (running for 03:59:18.92)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 15:57:18 (running for 03:59:23.94)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

[36m(RayTrainWorker pid=569510)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


[36m(RayTrainWorker pid=569510)[0m Rank 1 - Index 0 - Gradient norm: 4.536
[36m(RayTrainWorker pid=569511)[0m Averaged gradient norm: 1.000
[36m(RayTrainWorker pid=569511)[0m Iteration: 40 - Elapsed time: 353 - Training loss: ['1.642'] - Validation loss: ['0.000']
== Status ==
Current time: 2025-09-29 16:00:54 (running for 04:02:59.79)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:00:59 (running for 04:03:04.81)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTr

[36m(RayTrainWorker pid=569511)[0m   return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


== Status ==
Current time: 2025-09-29 16:01:04 (running for 04:03:09.84)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 16:01:09 (running for 04:03:14.85)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:01:14 (running for 04:03:19.87)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 10%|████▎                                      | 1/10 [00:10<01:33, 10.40s/it]
  0%|                                                   | 0/10 [00:00<?, ?it/s]


== Status ==
Current time: 2025-09-29 16:01:19 (running for 04:03:24.89)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:01:25 (running for 04:03:30.07)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 20%|████████▌                                  | 2/10 [00:20<01:23, 10.41s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 16:01:30 (running for 04:03:35.09)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:01:35 (running for 04:03:40.11)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:01:40 (running for 04:03:45.13)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5

 30%|████████████▉                              | 3/10 [00:31<01:13, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 16:01:45 (running for 04:03:50.15)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:01:50 (running for 04:03:55.17)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 40%|█████████████████▏                         | 4/10 [00:41<01:02, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 16:01:55 (running for 04:04:00.19)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:02:00 (running for 04:04:05.21)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 50%|█████████████████████▌                     | 5/10 [00:52<00:52, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 16:02:05 (running for 04:04:10.23)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:02:10 (running for 04:04:15.24)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 60%|█████████████████████████▊                 | 6/10 [01:02<00:41, 10.44s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 16:02:15 (running for 04:04:20.27)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:02:20 (running for 04:04:25.28)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 70%|██████████████████████████████             | 7/10 [01:13<00:31, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 16:02:25 (running for 04:04:30.30)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:02:30 (running for 04:04:35.32)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 80%|██████████████████████████████████▍        | 8/10 [01:23<00:20, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 16:02:35 (running for 04:04:40.34)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:02:40 (running for 04:04:45.35)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




 90%|██████████████████████████████████████▋    | 9/10 [01:33<00:10, 10.43s/it][32m [repeated 2x across cluster][0m


== Status ==
Current time: 2025-09-29 16:02:45 (running for 04:04:50.37)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:02:50 (running for 04:04:55.39)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m Rank 1 - model initialization done -> loss fn initialization
[36m(RayTrainWorker pid=569510)[0m Rank 1 - reading data
[36m(RayTrainWorker pid=569511)[0m Rank 0 - reading data done -> model initialization


100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.44s/it]
 90%|██████████████████████████████████████▋    | 9/10 [01:34<00:10, 10.48s/it]
100%|██████████████████████████████████████████| 10/10 [01:44<00:00, 10.46s/it]


== Status ==
Current time: 2025-09-29 16:02:55 (running for 04:05:00.41)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [KSpaceLoss] torch.Size([8, 2001191, 2, 1]) torch.Size([8, 2001191, 2, 1]) None
[36m(RayTrainWorker pid=569511)[0m Rank 0 - model initialization done -> loss fn initialization
== Status ==
Current time: 2025-09-29 16:03:00 (running for 04:05:05.43)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


[36m(RayTrainWorker pid=569510)[0m [ImageSpaceLoss] torch.Size([1



== Status ==
Current time: 2025-09-29 16:03:05 (running for 04:05:10.46)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:03:10 (running for 04:05:15.48)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-55/torch_trainer_example/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-09-29 16:03:15 (running for 04:05:20.50)
Using FIFO scheduling algorithm.
Logical resource usage: 49.0/128 CPUs, 2.0/2 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2025-09-29_11-57-43_511034_556628/artifacts/2025-09-29_11-57-5