From 17c906542688bcefbbe0b23186e1ac045258531e Mon Sep 17 00:00:00 2001 From: Devasena Inupakutika Date: Tue, 12 May 2026 04:00:54 +0000 Subject: [PATCH 1/2] cli_parser: guard --file/--object consolidation for non-benchmark subcommands reports/history/lockfile subparsers do not call add_storage_type_arguments(), so their Namespace has no .file or .object attribute. The unconditional read and delete in parse_arguments() crashed with AttributeError. Gate the consolidation on attribute presence; downstream code already uses getattr(args, 'data_access_protocol', None). Fixes #367 Signed-off-by: Devasena Inupakutika --- mlpstorage_py/cli_parser.py | 21 ++- requirements.txt | 251 ++++++++++++++++++++++++++++++++++++ tests/unit/test_cli.py | 87 +++++++++++++ 3 files changed, 352 insertions(+), 7 deletions(-) create mode 100644 requirements.txt diff --git a/mlpstorage_py/cli_parser.py b/mlpstorage_py/cli_parser.py index af32669f..cafecb98 100755 --- a/mlpstorage_py/cli_parser.py +++ b/mlpstorage_py/cli_parser.py @@ -115,13 +115,20 @@ def parse_arguments(): if hasattr(parsed_args, 'config_file') and parsed_args.config_file: parsed_args = apply_yaml_config_overrides(parsed_args) - # Consolidate the data access protocol into a single field - if parsed_args.file: - parsed_args.data_access_protocol = "file" - else: - parsed_args.data_access_protocol = parsed_args.object - del parsed_args.file - del parsed_args.object + # Consolidate the data access protocol into a single field. + # The --file / --object flags are only defined on benchmark subcommands + # that call add_storage_type_arguments() (training, checkpointing, + # vectordb, kvcache). Other subcommands (reports, history, lockfile) + # do not define them, so guard the consolidation on attribute presence. + if hasattr(parsed_args, "file") or hasattr(parsed_args, "object"): + if getattr(parsed_args, "file", False): + parsed_args.data_access_protocol = "file" + else: + parsed_args.data_access_protocol = getattr(parsed_args, "object", None) + # Clean up the raw flags so downstream code uses data_access_protocol. + for _attr in ("file", "object"): + if hasattr(parsed_args, _attr): + delattr(parsed_args, _attr) """ print(f"Arguments found: {parsed_args}") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..b132b220 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,251 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile pyproject.toml -o requirements.txt --universal +absl-py==2.4.0 + # via + # keras + # tensorboard + # tensorflow +antlr4-python3-runtime==4.9.3 + # via + # hydra-core + # omegaconf +argon2-cffi==25.1.0 + # via minio +argon2-cffi-bindings==25.1.0 + # via argon2-cffi +astunparse==1.6.3 + # via tensorflow +certifi==2026.4.22 + # via + # minio + # requests +cffi==2.0.0 + # via argon2-cffi-bindings +charset-normalizer==3.4.7 + # via requests +cuda-bindings==13.2.0 ; sys_platform == 'linux' + # via torch +cuda-pathfinder==1.5.4 ; sys_platform == 'linux' + # via cuda-bindings +cuda-toolkit==13.0.2 ; sys_platform == 'linux' + # via torch +dgen-py==0.2.4 + # via dlio-benchmark +dlio-benchmark @ git+https://github.com/russfellows/dlio_benchmark.git@842fb9b0bd9d26c773433b4d0805922040206b50 + # via mlpstorage (pyproject.toml) +filelock==3.29.0 + # via torch +flatbuffers==25.12.19 + # via tensorflow +fsspec==2026.4.0 + # via torch +gast==0.7.0 + # via tensorflow +google-pasta==0.2.0 + # via tensorflow +grpcio==1.80.0 + # via + # tensorboard + # tensorflow +h5py==3.16.0 + # via + # dlio-benchmark + # keras + # tensorflow +hydra-core==1.3.2 + # via dlio-benchmark +idna==3.14 + # via requests +jinja2==3.1.6 + # via torch +keras==3.14.1 + # via tensorflow +libclang==18.1.1 + # via tensorflow +markdown==3.10.2 + # via tensorboard +markdown-it-py==4.2.0 + # via rich +markupsafe==3.0.3 + # via + # jinja2 + # werkzeug +mdurl==0.1.2 + # via markdown-it-py +minio==7.2.20 + # via mlpstorage (pyproject.toml) +ml-dtypes==0.5.4 + # via + # keras + # tensorflow +mpi4py==4.1.1 + # via dlio-benchmark +mpmath==1.3.0 + # via sympy +namex==0.1.0 + # via keras +networkx==3.6.1 + # via torch +numpy==2.4.4 + # via + # dlio-benchmark + # h5py + # keras + # ml-dtypes + # pandas + # s3dlio + # tensorboard + # tensorflow +nvidia-cublas==13.1.0.3 ; sys_platform == 'linux' + # via + # cuda-toolkit + # nvidia-cudnn-cu13 + # nvidia-cusolver +nvidia-cuda-cupti==13.0.85 ; sys_platform == 'linux' + # via cuda-toolkit +nvidia-cuda-nvrtc==13.0.88 ; sys_platform == 'linux' + # via cuda-toolkit +nvidia-cuda-runtime==13.0.96 ; sys_platform == 'linux' + # via cuda-toolkit +nvidia-cudnn-cu13==9.19.0.56 ; sys_platform == 'linux' + # via torch +nvidia-cufft==12.0.0.61 ; sys_platform == 'linux' + # via cuda-toolkit +nvidia-cufile==1.15.1.6 ; sys_platform == 'linux' + # via cuda-toolkit +nvidia-curand==10.4.0.35 ; sys_platform == 'linux' + # via cuda-toolkit +nvidia-cusolver==12.0.4.66 ; sys_platform == 'linux' + # via cuda-toolkit +nvidia-cusparse==12.6.3.3 ; sys_platform == 'linux' + # via + # cuda-toolkit + # nvidia-cusolver +nvidia-cusparselt-cu13==0.8.0 ; sys_platform == 'linux' + # via torch +nvidia-nccl-cu13==2.28.9 ; sys_platform == 'linux' + # via torch +nvidia-nvjitlink==13.0.88 ; sys_platform == 'linux' + # via + # cuda-toolkit + # nvidia-cufft + # nvidia-cusolver + # nvidia-cusparse +nvidia-nvshmem-cu13==3.4.5 ; sys_platform == 'linux' + # via torch +nvidia-nvtx==13.0.85 ; sys_platform == 'linux' + # via cuda-toolkit +omegaconf==2.3.0 + # via + # dlio-benchmark + # hydra-core +opt-einsum==3.4.0 + # via tensorflow +optree==0.19.1 + # via keras +packaging==26.2 + # via + # mlpstorage (pyproject.toml) + # hydra-core + # keras + # tensorboard + # tensorflow + # wheel +pandas==3.0.3 + # via dlio-benchmark +pillow==12.2.0 + # via + # dlio-benchmark + # tensorboard +protobuf==7.34.1 + # via + # tensorboard + # tensorflow +psutil==7.2.2 + # via + # mlpstorage (pyproject.toml) + # dlio-benchmark +pyarrow==24.0.0 + # via + # mlpstorage (pyproject.toml) + # dlio-benchmark +pycparser==3.0 ; implementation_name != 'PyPy' + # via cffi +pycryptodome==3.23.0 + # via minio +pydftracer==2.0.2 + # via dlio-benchmark +pygments==2.20.0 + # via rich +python-dateutil==2.9.0.post0 + # via pandas +python-dotenv==1.2.2 + # via mlpstorage (pyproject.toml) +pyyaml==6.0.3 + # via + # mlpstorage (pyproject.toml) + # dlio-benchmark + # omegaconf +requests==2.34.0 + # via tensorflow +rich==15.0.0 + # via + # mlpstorage (pyproject.toml) + # keras +s3dlio==0.9.98 + # via + # mlpstorage (pyproject.toml) + # dlio-benchmark +s3torchconnector==1.5.0 + # via mlpstorage (pyproject.toml) +s3torchconnectorclient==1.5.0 + # via s3torchconnector +setuptools==81.0.0 + # via + # tensorboard + # tensorflow + # torch +six==1.17.0 + # via + # astunparse + # google-pasta + # python-dateutil + # tensorflow +sympy==1.14.0 + # via torch +tensorboard==2.20.0 + # via tensorflow +tensorboard-data-server==0.7.2 + # via tensorboard +tensorflow==2.20.0 + # via dlio-benchmark +termcolor==3.3.0 + # via tensorflow +torch==2.11.0 + # via + # dlio-benchmark + # s3torchconnector +triton==3.6.0 ; sys_platform == 'linux' + # via torch +typing-extensions==4.15.0 + # via + # dlio-benchmark + # grpcio + # minio + # optree + # tensorflow + # torch +tzdata==2026.2 ; sys_platform == 'emscripten' or sys_platform == 'win32' + # via pandas +urllib3==2.7.0 + # via + # minio + # requests +werkzeug==3.1.8 + # via tensorboard +wheel==0.47.0 + # via astunparse +wrapt==2.1.2 + # via tensorflow +zstandard==0.25.0 + # via dgen-py diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 236a2f5b..43f5206d 100755 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -722,3 +722,90 @@ def test_skips_none_values(self, tmp_path): result = apply_yaml_config_overrides(args) assert result.debug is True # Should not be overwritten assert result.loops == 5 + +class TestParseArgumentsStorageFlagConsolidation: + """Regression tests for issue #367. + + The CLI parser must not crash when a subcommand that doesn't define + --file / --object (reports, history, lockfile) is invoked, and must + still correctly consolidate those flags into data_access_protocol on + benchmark subcommands that do define them (training, checkpointing, + vectordb, kvcache). + """ + + @staticmethod + def _run(monkeypatch, argv): + """Invoke parse_arguments() with a synthetic sys.argv.""" + from mlpstorage_py.cli_parser import parse_arguments + monkeypatch.setattr(sys, "argv", argv) + return parse_arguments() + + # --- non-benchmark subcommands: must not raise AttributeError --- + + def test_reportgen_does_not_crash_without_storage_flags(self, monkeypatch, tmp_path): + """Regression test for #367: `reports reportgen` must parse cleanly.""" + args = self._run( + monkeypatch, + ["mlpstorage", "reports", "reportgen", "--results-dir", str(tmp_path)], + ) + assert args.program == "reports" + assert args.command == "reportgen" + assert not hasattr(args, "file") + assert not hasattr(args, "object") + + def test_history_does_not_crash_without_storage_flags(self, monkeypatch): + """`history show` must parse cleanly (no --file/--object on this parser).""" + args = self._run(monkeypatch, ["mlpstorage", "history", "show"]) + assert args.program == "history" + assert args.command == "show" + assert not hasattr(args, "file") + assert not hasattr(args, "object") + + def test_lockfile_does_not_crash_without_storage_flags(self, monkeypatch): + """`lockfile generate` must parse cleanly (no --file/--object on this parser).""" + args = self._run(monkeypatch, ["mlpstorage", "lockfile", "generate"]) + assert args.program == "lockfile" + assert not hasattr(args, "file") + assert not hasattr(args, "object") + + # --- benchmark subcommands: existing consolidation must still work --- + + def test_training_run_consolidates_file_flag(self, monkeypatch, tmp_path): + """`training run --file` should set data_access_protocol='file'.""" + args = self._run( + monkeypatch, + [ + "mlpstorage", "training", "run", + "--model", "unet3d", + "--hosts", "localhost", + "--num-accelerators", "1", + "--accelerator-type", "h100", + "--client-host-memory-in-gb", "64", + "--data-dir", str(tmp_path / "data"), + "--results-dir", str(tmp_path / "results"), + "--file", + ], + ) + assert args.data_access_protocol == "file" + assert not hasattr(args, "file") + assert not hasattr(args, "object") + + def test_training_run_consolidates_object_flag(self, monkeypatch, tmp_path): + """`training run --object s3` should set data_access_protocol='s3'.""" + args = self._run( + monkeypatch, + [ + "mlpstorage", "training", "run", + "--model", "unet3d", + "--hosts", "localhost", + "--num-accelerators", "1", + "--accelerator-type", "h100", + "--client-host-memory-in-gb", "64", + "--data-dir", str(tmp_path / "data"), + "--results-dir", str(tmp_path / "results"), + "--object", "s3", + ], + ) + assert args.data_access_protocol == "s3" + assert not hasattr(args, "file") + assert not hasattr(args, "object") From b54b6526082050bf666250be2675522e0ac80682 Mon Sep 17 00:00:00 2001 From: Devasena Inupakutika Date: Tue, 12 May 2026 04:11:06 +0000 Subject: [PATCH 2/2] Remove unwanted file --- requirements.txt | 251 ----------------------------------------------- 1 file changed, 251 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index b132b220..00000000 --- a/requirements.txt +++ /dev/null @@ -1,251 +0,0 @@ -# This file was autogenerated by uv via the following command: -# uv pip compile pyproject.toml -o requirements.txt --universal -absl-py==2.4.0 - # via - # keras - # tensorboard - # tensorflow -antlr4-python3-runtime==4.9.3 - # via - # hydra-core - # omegaconf -argon2-cffi==25.1.0 - # via minio -argon2-cffi-bindings==25.1.0 - # via argon2-cffi -astunparse==1.6.3 - # via tensorflow -certifi==2026.4.22 - # via - # minio - # requests -cffi==2.0.0 - # via argon2-cffi-bindings -charset-normalizer==3.4.7 - # via requests -cuda-bindings==13.2.0 ; sys_platform == 'linux' - # via torch -cuda-pathfinder==1.5.4 ; sys_platform == 'linux' - # via cuda-bindings -cuda-toolkit==13.0.2 ; sys_platform == 'linux' - # via torch -dgen-py==0.2.4 - # via dlio-benchmark -dlio-benchmark @ git+https://github.com/russfellows/dlio_benchmark.git@842fb9b0bd9d26c773433b4d0805922040206b50 - # via mlpstorage (pyproject.toml) -filelock==3.29.0 - # via torch -flatbuffers==25.12.19 - # via tensorflow -fsspec==2026.4.0 - # via torch -gast==0.7.0 - # via tensorflow -google-pasta==0.2.0 - # via tensorflow -grpcio==1.80.0 - # via - # tensorboard - # tensorflow -h5py==3.16.0 - # via - # dlio-benchmark - # keras - # tensorflow -hydra-core==1.3.2 - # via dlio-benchmark -idna==3.14 - # via requests -jinja2==3.1.6 - # via torch -keras==3.14.1 - # via tensorflow -libclang==18.1.1 - # via tensorflow -markdown==3.10.2 - # via tensorboard -markdown-it-py==4.2.0 - # via rich -markupsafe==3.0.3 - # via - # jinja2 - # werkzeug -mdurl==0.1.2 - # via markdown-it-py -minio==7.2.20 - # via mlpstorage (pyproject.toml) -ml-dtypes==0.5.4 - # via - # keras - # tensorflow -mpi4py==4.1.1 - # via dlio-benchmark -mpmath==1.3.0 - # via sympy -namex==0.1.0 - # via keras -networkx==3.6.1 - # via torch -numpy==2.4.4 - # via - # dlio-benchmark - # h5py - # keras - # ml-dtypes - # pandas - # s3dlio - # tensorboard - # tensorflow -nvidia-cublas==13.1.0.3 ; sys_platform == 'linux' - # via - # cuda-toolkit - # nvidia-cudnn-cu13 - # nvidia-cusolver -nvidia-cuda-cupti==13.0.85 ; sys_platform == 'linux' - # via cuda-toolkit -nvidia-cuda-nvrtc==13.0.88 ; sys_platform == 'linux' - # via cuda-toolkit -nvidia-cuda-runtime==13.0.96 ; sys_platform == 'linux' - # via cuda-toolkit -nvidia-cudnn-cu13==9.19.0.56 ; sys_platform == 'linux' - # via torch -nvidia-cufft==12.0.0.61 ; sys_platform == 'linux' - # via cuda-toolkit -nvidia-cufile==1.15.1.6 ; sys_platform == 'linux' - # via cuda-toolkit -nvidia-curand==10.4.0.35 ; sys_platform == 'linux' - # via cuda-toolkit -nvidia-cusolver==12.0.4.66 ; sys_platform == 'linux' - # via cuda-toolkit -nvidia-cusparse==12.6.3.3 ; sys_platform == 'linux' - # via - # cuda-toolkit - # nvidia-cusolver -nvidia-cusparselt-cu13==0.8.0 ; sys_platform == 'linux' - # via torch -nvidia-nccl-cu13==2.28.9 ; sys_platform == 'linux' - # via torch -nvidia-nvjitlink==13.0.88 ; sys_platform == 'linux' - # via - # cuda-toolkit - # nvidia-cufft - # nvidia-cusolver - # nvidia-cusparse -nvidia-nvshmem-cu13==3.4.5 ; sys_platform == 'linux' - # via torch -nvidia-nvtx==13.0.85 ; sys_platform == 'linux' - # via cuda-toolkit -omegaconf==2.3.0 - # via - # dlio-benchmark - # hydra-core -opt-einsum==3.4.0 - # via tensorflow -optree==0.19.1 - # via keras -packaging==26.2 - # via - # mlpstorage (pyproject.toml) - # hydra-core - # keras - # tensorboard - # tensorflow - # wheel -pandas==3.0.3 - # via dlio-benchmark -pillow==12.2.0 - # via - # dlio-benchmark - # tensorboard -protobuf==7.34.1 - # via - # tensorboard - # tensorflow -psutil==7.2.2 - # via - # mlpstorage (pyproject.toml) - # dlio-benchmark -pyarrow==24.0.0 - # via - # mlpstorage (pyproject.toml) - # dlio-benchmark -pycparser==3.0 ; implementation_name != 'PyPy' - # via cffi -pycryptodome==3.23.0 - # via minio -pydftracer==2.0.2 - # via dlio-benchmark -pygments==2.20.0 - # via rich -python-dateutil==2.9.0.post0 - # via pandas -python-dotenv==1.2.2 - # via mlpstorage (pyproject.toml) -pyyaml==6.0.3 - # via - # mlpstorage (pyproject.toml) - # dlio-benchmark - # omegaconf -requests==2.34.0 - # via tensorflow -rich==15.0.0 - # via - # mlpstorage (pyproject.toml) - # keras -s3dlio==0.9.98 - # via - # mlpstorage (pyproject.toml) - # dlio-benchmark -s3torchconnector==1.5.0 - # via mlpstorage (pyproject.toml) -s3torchconnectorclient==1.5.0 - # via s3torchconnector -setuptools==81.0.0 - # via - # tensorboard - # tensorflow - # torch -six==1.17.0 - # via - # astunparse - # google-pasta - # python-dateutil - # tensorflow -sympy==1.14.0 - # via torch -tensorboard==2.20.0 - # via tensorflow -tensorboard-data-server==0.7.2 - # via tensorboard -tensorflow==2.20.0 - # via dlio-benchmark -termcolor==3.3.0 - # via tensorflow -torch==2.11.0 - # via - # dlio-benchmark - # s3torchconnector -triton==3.6.0 ; sys_platform == 'linux' - # via torch -typing-extensions==4.15.0 - # via - # dlio-benchmark - # grpcio - # minio - # optree - # tensorflow - # torch -tzdata==2026.2 ; sys_platform == 'emscripten' or sys_platform == 'win32' - # via pandas -urllib3==2.7.0 - # via - # minio - # requests -werkzeug==3.1.8 - # via tensorboard -wheel==0.47.0 - # via astunparse -wrapt==2.1.2 - # via tensorflow -zstandard==0.25.0 - # via dgen-py