## Configuring Training Datasets

### 1. Getting Started

#### Bulk Ingest

In [10]:
import ray
from ray.air import session
from ray.data import Dataset
from ray.data.preprocessors import BatchMapper
from ray.train.torch import TorchTrainer
from ray.air.config import ScalingConfig

In [12]:
# A simple preprocessor that just scales all values by 2
preprocessor = BatchMapper(lambda df: df * 2)

def train_loop_per_worker():
    # get a handle to the worker's assigned Dataset shard
    data_shard: Dataset = session.get_dataset_shard('train')
    
    # manually iterate over the data 10 times
    for _ in range(10):
        for batch in data_shard.iter_batches():
            print("training on batch", batch)
    
    # print the stats for performance debugging
    print(data_shard.stats())

trainer = TorchTrainer(
    train_loop_per_worker,
    scaling_config=ScalingConfig(num_workers=4),
    datasets={'train': ray.data.range_tensor(1000)},
    preprocessor=preprocessor,
)
trainer.fit()

Trial name,status,loc
TorchTrainer_b58dd_00000,TERMINATED,127.0.0.1:56227


[2m[36m(RayTrainWorker pid=56236)[0m 2022-10-19 22:19:56,720	INFO config.py:71 -- Setting up process group for: env:// [rank=0, world_size=4]
2022-10-19 22:19:58,368	ERROR checkpoint_manager.py:320 -- Result dict has no key: training_iteration. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['trial_id', 'experiment_id', 'date', 'timestamp', 'pid', 'hostname', 'node_ip', 'config', 'done']


[2m[36m(RayTrainWorker pid=56236)[0m training on batch [[ 372]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 374]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 376]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 378]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 380]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 382]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 384]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 386]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 388]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 390]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 392]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 394]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 396]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 398]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 400]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 402]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 404]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 406]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 408]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 410]
[2m[36m(RayTrainWork

2022-10-19 22:19:58,495	INFO tune.py:758 -- Total run time: 8.79 seconds (8.65 seconds for the tuning loop).


[2m[36m(RayTrainWorker pid=56237)[0m  [ 802]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 804]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 806]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 808]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 810]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 812]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 814]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 816]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 818]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 820]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 822]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 824]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 826]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 828]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 830]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 832]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 834]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 836]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 838]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 840]
[2m[36m(RayTrainWorker pid=56237)[0m 

Result(metrics={'trial_id': 'b58dd_00000', 'done': True}, error=None, log_dir=PosixPath('/Users/yjkim/ray_results/TorchTrainer_2022-10-19_22-19-49/TorchTrainer_b58dd_00000_0_2022-10-19_22-19-49'))

[2m[36m(RayTrainWorker pid=56237)[0m  [1194]
[2m[36m(RayTrainWorker pid=56237)[0m  [1196]
[2m[36m(RayTrainWorker pid=56237)[0m  [1198]
[2m[36m(RayTrainWorker pid=56237)[0m  [1200]
[2m[36m(RayTrainWorker pid=56237)[0m  [1202]
[2m[36m(RayTrainWorker pid=56237)[0m  [1204]
[2m[36m(RayTrainWorker pid=56237)[0m  [1206]
[2m[36m(RayTrainWorker pid=56237)[0m  [1208]
[2m[36m(RayTrainWorker pid=56237)[0m  [1210]
[2m[36m(RayTrainWorker pid=56237)[0m  [1212]
[2m[36m(RayTrainWorker pid=56237)[0m  [1214]
[2m[36m(RayTrainWorker pid=56237)[0m  [1216]
[2m[36m(RayTrainWorker pid=56237)[0m  [1218]
[2m[36m(RayTrainWorker pid=56237)[0m  [1220]
[2m[36m(RayTrainWorker pid=56237)[0m  [1222]
[2m[36m(RayTrainWorker pid=56237)[0m  [1224]
[2m[36m(RayTrainWorker pid=56237)[0m  [1226]
[2m[36m(RayTrainWorker pid=56237)[0m  [1228]
[2m[36m(RayTrainWorker pid=56237)[0m  [1230]
[2m[36m(RayTrainWorker pid=56237)[0m  [1232]
[2m[36m(RayTrainWorker pid=56237)[0m 

[2m[36m(RayTrainWorker pid=56237)[0m  [ 800]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 802]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 804]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 806]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 808]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 810]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 812]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 814]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 816]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 818]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 820]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 822]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 824]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 826]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 828]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 830]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 832]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 834]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 836]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 838]
[2m[36m(RayTrainWorker pid=56237)[0m 

[2m[36m(RayTrainWorker pid=56239)[0m  [ 546]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 548]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 550]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 552]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 554]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 556]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 558]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 560]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 562]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 564]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 566]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 568]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 570]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 572]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 574]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 576]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 578]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 580]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 582]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 584]
[2m[36m(RayTrainWorker pid=56239)[0m 

[2m[36m(RayTrainWorker pid=56239)[0m  [ 546]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 548]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 550]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 552]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 554]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 556]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 558]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 560]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 562]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 564]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 566]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 568]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 570]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 572]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 574]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 576]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 578]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 580]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 582]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 584]
[2m[36m(RayTrainWorker pid=56239)[0m 

[2m[36m(RayTrainWorker pid=56238)[0m  [ 920]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 922]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 924]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 926]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 928]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 930]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 932]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 934]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 936]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 938]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 940]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 942]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 944]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 946]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 948]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 950]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 952]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 954]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 956]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 958]
[2m[36m(RayTrainWorker pid=56238)[0m 

[2m[36m(RayTrainWorker pid=56236)[0m  [ 670]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 672]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 674]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 676]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 678]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 680]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 682]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 684]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 686]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 688]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 690]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 692]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 694]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 696]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 698]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 700]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 702]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 704]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 706]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 708]
[2m[36m(RayTrainWorker pid=56236)[0m 

[2m[36m(RayTrainWorker pid=56238)[0m  [ 918]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 920]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 922]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 924]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 926]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 928]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 930]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 932]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 934]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 936]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 938]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 940]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 942]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 944]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 946]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 948]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 950]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 952]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 954]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 956]
[2m[36m(RayTrainWorker pid=56238)[0m 

[2m[36m(RayTrainWorker pid=56239)[0m  [ 540]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 542]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 544]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 546]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 548]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 550]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 552]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 554]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 556]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 558]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 560]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 562]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 564]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 566]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 568]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 570]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 572]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 574]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 576]
[2m[36m(RayTrainWorker pid=56239)[0m  [ 578]
[2m[36m(RayTrainWorker pid=56239)[0m 

[2m[36m(RayTrainWorker pid=56237)[0m  [ 784]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 786]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 788]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 790]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 792]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 794]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 796]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 798]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 800]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 802]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 804]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 806]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 808]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 810]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 812]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 814]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 816]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 818]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 820]
[2m[36m(RayTrainWorker pid=56237)[0m  [ 822]
[2m[36m(RayTrainWorker pid=56237)[0m 

[2m[36m(RayTrainWorker pid=56238)[0m  [ 908]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 910]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 912]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 914]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 916]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 918]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 920]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 922]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 924]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 926]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 928]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 930]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 932]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 934]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 936]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 938]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 940]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 942]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 944]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 946]
[2m[36m(RayTrainWorker pid=56238)[0m 

[2m[36m(RayTrainWorker pid=56238)[0m  [ 900]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 902]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 904]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 906]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 908]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 910]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 912]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 914]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 916]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 918]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 920]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 922]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 924]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 926]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 928]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 930]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 932]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 934]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 936]
[2m[36m(RayTrainWorker pid=56238)[0m  [ 938]
[2m[36m(RayTrainWorker pid=56238)[0m 

[2m[36m(RayTrainWorker pid=56236)[0m  [ 280]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 282]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 284]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 286]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 288]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 290]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 292]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 294]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 296]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 298]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 300]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 302]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 304]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 306]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 308]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 310]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 312]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 314]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 316]
[2m[36m(RayTrainWorker pid=56236)[0m  [ 318]
[2m[36m(RayTrainWorker pid=56236)[0m 

[2m[36m(RayTrainWorker pid=56239)[0m  [1034]
[2m[36m(RayTrainWorker pid=56239)[0m  [1036]
[2m[36m(RayTrainWorker pid=56239)[0m  [1038]
[2m[36m(RayTrainWorker pid=56239)[0m  [1040]
[2m[36m(RayTrainWorker pid=56239)[0m  [1042]
[2m[36m(RayTrainWorker pid=56239)[0m  [1044]
[2m[36m(RayTrainWorker pid=56239)[0m  [1046]
[2m[36m(RayTrainWorker pid=56239)[0m  [1048]
[2m[36m(RayTrainWorker pid=56239)[0m  [1050]
[2m[36m(RayTrainWorker pid=56239)[0m  [1052]
[2m[36m(RayTrainWorker pid=56239)[0m  [1054]
[2m[36m(RayTrainWorker pid=56239)[0m  [1056]
[2m[36m(RayTrainWorker pid=56239)[0m  [1058]
[2m[36m(RayTrainWorker pid=56239)[0m  [1060]
[2m[36m(RayTrainWorker pid=56239)[0m  [1062]
[2m[36m(RayTrainWorker pid=56239)[0m  [1064]
[2m[36m(RayTrainWorker pid=56239)[0m  [1066]
[2m[36m(RayTrainWorker pid=56239)[0m  [1068]
[2m[36m(RayTrainWorker pid=56239)[0m  [1070]
[2m[36m(RayTrainWorker pid=56239)[0m  [1072]
[2m[36m(RayTrainWorker pid=56239)[0m 

[2m[36m(RayTrainWorker pid=56238)[0m  [1746]
[2m[36m(RayTrainWorker pid=56238)[0m  [1748]
[2m[36m(RayTrainWorker pid=56238)[0m  [1750]
[2m[36m(RayTrainWorker pid=56238)[0m  [1752]
[2m[36m(RayTrainWorker pid=56238)[0m  [1754]
[2m[36m(RayTrainWorker pid=56238)[0m  [1756]
[2m[36m(RayTrainWorker pid=56238)[0m  [1758]
[2m[36m(RayTrainWorker pid=56238)[0m  [1760]
[2m[36m(RayTrainWorker pid=56238)[0m  [1762]
[2m[36m(RayTrainWorker pid=56238)[0m  [1764]
[2m[36m(RayTrainWorker pid=56238)[0m  [1766]
[2m[36m(RayTrainWorker pid=56238)[0m  [1768]
[2m[36m(RayTrainWorker pid=56238)[0m  [1770]
[2m[36m(RayTrainWorker pid=56238)[0m  [1772]
[2m[36m(RayTrainWorker pid=56238)[0m  [1774]
[2m[36m(RayTrainWorker pid=56238)[0m  [1776]
[2m[36m(RayTrainWorker pid=56238)[0m  [1778]
[2m[36m(RayTrainWorker pid=56238)[0m  [1780]
[2m[36m(RayTrainWorker pid=56238)[0m  [1782]
[2m[36m(RayTrainWorker pid=56238)[0m  [1784]
[2m[36m(RayTrainWorker pid=56238)[0m 

#### Streaming ingest

In [13]:
import ray
from ray.air import session
from ray.data import DatasetPipeline
from ray.data.preprocessors import BatchMapper
from ray.train.torch import TorchTrainer
from ray.air.config import ScalingConfig, DatasetConfig

In [None]:
preprocessor = BatchMapper(lambda df: df * 2)

def train_loop_per_worker():
    data_shard: DatasetPipeline = session.get_dataset_shard('train')
    
    for epoch in data_shard.iter_epochs(10):
        for batch in epoch.iter_batches():
            print('training on batch', batch)
    
    print(data_shard.stats())

N = 200

trainer = TorchTrainer(
    train_loop_per_worker,
    scaling_config=ScalingConfig(w)
)

### 2. Shffling data