Skip to content

Commit

Permalink
MultiProcDataset, worker proc, late load dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
albertz committed Jan 17, 2024
1 parent 8825a11 commit 402347a
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions returnn/datasets/multi_proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from __future__ import annotations
from typing import Optional, Any, Dict, List
import sys
from .basic import init_dataset, DatasetSeq
from .basic import init_dataset, Dataset, DatasetSeq
from .cached2 import CachedDataset2
from returnn.util.basic import try_run
import multiprocessing as mp
Expand Down Expand Up @@ -216,7 +216,7 @@ def _worker_proc_loop(
with open("/proc/self/comm", "w") as f:
f.write(f"MPD worker {worker_index}")

dataset = init_dataset(dataset_dict)
dataset: Optional[Dataset] = None

got_init_seq_order = False
cache = [] # type: List[DatasetSeq]
Expand Down Expand Up @@ -289,6 +289,8 @@ def _get(seq_idx: int) -> Optional[DatasetSeq]:
elif msg == "init_seq_order":
msg_, seq_order_ = seq_order.recv()
assert msg_ == "seq_order_shard"
if dataset is None:
dataset = init_dataset(dataset_dict)
dataset.init_seq_order(seq_order=seq_order_, **kwargs)
got_init_seq_order = True
next_seq_idx = 0
Expand Down

0 comments on commit 402347a

Please sign in to comment.