From 17c906542688bcefbbe0b23186e1ac045258531e Mon Sep 17 00:00:00 2001
From: Devasena Inupakutika <devasena.i@samsung.com>
Date: Tue, 12 May 2026 04:00:54 +0000
Subject: [PATCH 1/2] cli_parser: guard --file/--object consolidation for
 non-benchmark subcommands

   reports/history/lockfile subparsers do not call add_storage_type_arguments(),
   so their Namespace has no .file or .object attribute. The unconditional
   read and delete in parse_arguments() crashed with AttributeError. Gate the
   consolidation on attribute presence; downstream code already uses
   getattr(args, 'data_access_protocol', None).

   Fixes #367

Signed-off-by: Devasena Inupakutika <devasena.i@samsung.com>
---
 mlpstorage_py/cli_parser.py |  21 ++-
 requirements.txt            | 251 ++++++++++++++++++++++++++++++++++++
 tests/unit/test_cli.py      |  87 +++++++++++++
 3 files changed, 352 insertions(+), 7 deletions(-)
 create mode 100644 requirements.txt

diff --git a/mlpstorage_py/cli_parser.py b/mlpstorage_py/cli_parser.py
index af32669f..cafecb98 100755
--- a/mlpstorage_py/cli_parser.py
+++ b/mlpstorage_py/cli_parser.py
@@ -115,13 +115,20 @@ def parse_arguments():
     if hasattr(parsed_args, 'config_file') and parsed_args.config_file:
         parsed_args = apply_yaml_config_overrides(parsed_args)
 
-    # Consolidate the data access protocol into a single field
-    if parsed_args.file:
-        parsed_args.data_access_protocol = "file"
-    else:
-        parsed_args.data_access_protocol = parsed_args.object
-    del parsed_args.file
-    del parsed_args.object
+    # Consolidate the data access protocol into a single field.
+    # The --file / --object flags are only defined on benchmark subcommands
+    # that call add_storage_type_arguments() (training, checkpointing,
+    # vectordb, kvcache). Other subcommands (reports, history, lockfile)
+    # do not define them, so guard the consolidation on attribute presence.
+    if hasattr(parsed_args, "file") or hasattr(parsed_args, "object"):
+        if getattr(parsed_args, "file", False):
+            parsed_args.data_access_protocol = "file"
+        else:
+            parsed_args.data_access_protocol = getattr(parsed_args, "object", None)
+        # Clean up the raw flags so downstream code uses data_access_protocol.
+        for _attr in ("file", "object"):
+            if hasattr(parsed_args, _attr):
+                delattr(parsed_args, _attr)
 
     """
     print(f"Arguments found: {parsed_args}")
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..b132b220
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,251 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o requirements.txt --universal
+absl-py==2.4.0
+    # via
+    #   keras
+    #   tensorboard
+    #   tensorflow
+antlr4-python3-runtime==4.9.3
+    # via
+    #   hydra-core
+    #   omegaconf
+argon2-cffi==25.1.0
+    # via minio
+argon2-cffi-bindings==25.1.0
+    # via argon2-cffi
+astunparse==1.6.3
+    # via tensorflow
+certifi==2026.4.22
+    # via
+    #   minio
+    #   requests
+cffi==2.0.0
+    # via argon2-cffi-bindings
+charset-normalizer==3.4.7
+    # via requests
+cuda-bindings==13.2.0 ; sys_platform == 'linux'
+    # via torch
+cuda-pathfinder==1.5.4 ; sys_platform == 'linux'
+    # via cuda-bindings
+cuda-toolkit==13.0.2 ; sys_platform == 'linux'
+    # via torch
+dgen-py==0.2.4
+    # via dlio-benchmark
+dlio-benchmark @ git+https://github.com/russfellows/dlio_benchmark.git@842fb9b0bd9d26c773433b4d0805922040206b50
+    # via mlpstorage (pyproject.toml)
+filelock==3.29.0
+    # via torch
+flatbuffers==25.12.19
+    # via tensorflow
+fsspec==2026.4.0
+    # via torch
+gast==0.7.0
+    # via tensorflow
+google-pasta==0.2.0
+    # via tensorflow
+grpcio==1.80.0
+    # via
+    #   tensorboard
+    #   tensorflow
+h5py==3.16.0
+    # via
+    #   dlio-benchmark
+    #   keras
+    #   tensorflow
+hydra-core==1.3.2
+    # via dlio-benchmark
+idna==3.14
+    # via requests
+jinja2==3.1.6
+    # via torch
+keras==3.14.1
+    # via tensorflow
+libclang==18.1.1
+    # via tensorflow
+markdown==3.10.2
+    # via tensorboard
+markdown-it-py==4.2.0
+    # via rich
+markupsafe==3.0.3
+    # via
+    #   jinja2
+    #   werkzeug
+mdurl==0.1.2
+    # via markdown-it-py
+minio==7.2.20
+    # via mlpstorage (pyproject.toml)
+ml-dtypes==0.5.4
+    # via
+    #   keras
+    #   tensorflow
+mpi4py==4.1.1
+    # via dlio-benchmark
+mpmath==1.3.0
+    # via sympy
+namex==0.1.0
+    # via keras
+networkx==3.6.1
+    # via torch
+numpy==2.4.4
+    # via
+    #   dlio-benchmark
+    #   h5py
+    #   keras
+    #   ml-dtypes
+    #   pandas
+    #   s3dlio
+    #   tensorboard
+    #   tensorflow
+nvidia-cublas==13.1.0.3 ; sys_platform == 'linux'
+    # via
+    #   cuda-toolkit
+    #   nvidia-cudnn-cu13
+    #   nvidia-cusolver
+nvidia-cuda-cupti==13.0.85 ; sys_platform == 'linux'
+    # via cuda-toolkit
+nvidia-cuda-nvrtc==13.0.88 ; sys_platform == 'linux'
+    # via cuda-toolkit
+nvidia-cuda-runtime==13.0.96 ; sys_platform == 'linux'
+    # via cuda-toolkit
+nvidia-cudnn-cu13==9.19.0.56 ; sys_platform == 'linux'
+    # via torch
+nvidia-cufft==12.0.0.61 ; sys_platform == 'linux'
+    # via cuda-toolkit
+nvidia-cufile==1.15.1.6 ; sys_platform == 'linux'
+    # via cuda-toolkit
+nvidia-curand==10.4.0.35 ; sys_platform == 'linux'
+    # via cuda-toolkit
+nvidia-cusolver==12.0.4.66 ; sys_platform == 'linux'
+    # via cuda-toolkit
+nvidia-cusparse==12.6.3.3 ; sys_platform == 'linux'
+    # via
+    #   cuda-toolkit
+    #   nvidia-cusolver
+nvidia-cusparselt-cu13==0.8.0 ; sys_platform == 'linux'
+    # via torch
+nvidia-nccl-cu13==2.28.9 ; sys_platform == 'linux'
+    # via torch
+nvidia-nvjitlink==13.0.88 ; sys_platform == 'linux'
+    # via
+    #   cuda-toolkit
+    #   nvidia-cufft
+    #   nvidia-cusolver
+    #   nvidia-cusparse
+nvidia-nvshmem-cu13==3.4.5 ; sys_platform == 'linux'
+    # via torch
+nvidia-nvtx==13.0.85 ; sys_platform == 'linux'
+    # via cuda-toolkit
+omegaconf==2.3.0
+    # via
+    #   dlio-benchmark
+    #   hydra-core
+opt-einsum==3.4.0
+    # via tensorflow
+optree==0.19.1
+    # via keras
+packaging==26.2
+    # via
+    #   mlpstorage (pyproject.toml)
+    #   hydra-core
+    #   keras
+    #   tensorboard
+    #   tensorflow
+    #   wheel
+pandas==3.0.3
+    # via dlio-benchmark
+pillow==12.2.0
+    # via
+    #   dlio-benchmark
+    #   tensorboard
+protobuf==7.34.1
+    # via
+    #   tensorboard
+    #   tensorflow
+psutil==7.2.2
+    # via
+    #   mlpstorage (pyproject.toml)
+    #   dlio-benchmark
+pyarrow==24.0.0
+    # via
+    #   mlpstorage (pyproject.toml)
+    #   dlio-benchmark
+pycparser==3.0 ; implementation_name != 'PyPy'
+    # via cffi
+pycryptodome==3.23.0
+    # via minio
+pydftracer==2.0.2
+    # via dlio-benchmark
+pygments==2.20.0
+    # via rich
+python-dateutil==2.9.0.post0
+    # via pandas
+python-dotenv==1.2.2
+    # via mlpstorage (pyproject.toml)
+pyyaml==6.0.3
+    # via
+    #   mlpstorage (pyproject.toml)
+    #   dlio-benchmark
+    #   omegaconf
+requests==2.34.0
+    # via tensorflow
+rich==15.0.0
+    # via
+    #   mlpstorage (pyproject.toml)
+    #   keras
+s3dlio==0.9.98
+    # via
+    #   mlpstorage (pyproject.toml)
+    #   dlio-benchmark
+s3torchconnector==1.5.0
+    # via mlpstorage (pyproject.toml)
+s3torchconnectorclient==1.5.0
+    # via s3torchconnector
+setuptools==81.0.0
+    # via
+    #   tensorboard
+    #   tensorflow
+    #   torch
+six==1.17.0
+    # via
+    #   astunparse
+    #   google-pasta
+    #   python-dateutil
+    #   tensorflow
+sympy==1.14.0
+    # via torch
+tensorboard==2.20.0
+    # via tensorflow
+tensorboard-data-server==0.7.2
+    # via tensorboard
+tensorflow==2.20.0
+    # via dlio-benchmark
+termcolor==3.3.0
+    # via tensorflow
+torch==2.11.0
+    # via
+    #   dlio-benchmark
+    #   s3torchconnector
+triton==3.6.0 ; sys_platform == 'linux'
+    # via torch
+typing-extensions==4.15.0
+    # via
+    #   dlio-benchmark
+    #   grpcio
+    #   minio
+    #   optree
+    #   tensorflow
+    #   torch
+tzdata==2026.2 ; sys_platform == 'emscripten' or sys_platform == 'win32'
+    # via pandas
+urllib3==2.7.0
+    # via
+    #   minio
+    #   requests
+werkzeug==3.1.8
+    # via tensorboard
+wheel==0.47.0
+    # via astunparse
+wrapt==2.1.2
+    # via tensorflow
+zstandard==0.25.0
+    # via dgen-py
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 236a2f5b..43f5206d 100755
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -722,3 +722,90 @@ def test_skips_none_values(self, tmp_path):
         result = apply_yaml_config_overrides(args)
         assert result.debug is True  # Should not be overwritten
         assert result.loops == 5
+
+class TestParseArgumentsStorageFlagConsolidation:
+    """Regression tests for issue #367.
+
+    The CLI parser must not crash when a subcommand that doesn't define
+    --file / --object (reports, history, lockfile) is invoked, and must
+    still correctly consolidate those flags into data_access_protocol on
+    benchmark subcommands that do define them (training, checkpointing,
+    vectordb, kvcache).
+    """
+
+    @staticmethod
+    def _run(monkeypatch, argv):
+        """Invoke parse_arguments() with a synthetic sys.argv."""
+        from mlpstorage_py.cli_parser import parse_arguments
+        monkeypatch.setattr(sys, "argv", argv)
+        return parse_arguments()
+
+    # --- non-benchmark subcommands: must not raise AttributeError ---
+
+    def test_reportgen_does_not_crash_without_storage_flags(self, monkeypatch, tmp_path):
+        """Regression test for #367: `reports reportgen` must parse cleanly."""
+        args = self._run(
+            monkeypatch,
+            ["mlpstorage", "reports", "reportgen", "--results-dir", str(tmp_path)],
+        )
+        assert args.program == "reports"
+        assert args.command == "reportgen"
+        assert not hasattr(args, "file")
+        assert not hasattr(args, "object")
+
+    def test_history_does_not_crash_without_storage_flags(self, monkeypatch):
+        """`history show` must parse cleanly (no --file/--object on this parser)."""
+        args = self._run(monkeypatch, ["mlpstorage", "history", "show"])
+        assert args.program == "history"
+        assert args.command == "show"
+        assert not hasattr(args, "file")
+        assert not hasattr(args, "object")
+
+    def test_lockfile_does_not_crash_without_storage_flags(self, monkeypatch):
+        """`lockfile generate` must parse cleanly (no --file/--object on this parser)."""
+        args = self._run(monkeypatch, ["mlpstorage", "lockfile", "generate"])
+        assert args.program == "lockfile"
+        assert not hasattr(args, "file")
+        assert not hasattr(args, "object")
+
+    # --- benchmark subcommands: existing consolidation must still work ---
+
+    def test_training_run_consolidates_file_flag(self, monkeypatch, tmp_path):
+        """`training run --file` should set data_access_protocol='file'."""
+        args = self._run(
+            monkeypatch,
+            [
+                "mlpstorage", "training", "run",
+                "--model", "unet3d",
+                "--hosts", "localhost",
+                "--num-accelerators", "1",
+                "--accelerator-type", "h100",
+                "--client-host-memory-in-gb", "64",
+                "--data-dir", str(tmp_path / "data"),
+                "--results-dir", str(tmp_path / "results"),
+                "--file",
+            ],
+        )
+        assert args.data_access_protocol == "file"
+        assert not hasattr(args, "file")
+        assert not hasattr(args, "object")
+
+    def test_training_run_consolidates_object_flag(self, monkeypatch, tmp_path):
+        """`training run --object s3` should set data_access_protocol='s3'."""
+        args = self._run(
+            monkeypatch,
+            [
+                "mlpstorage", "training", "run",
+                "--model", "unet3d",
+                "--hosts", "localhost",
+                "--num-accelerators", "1",
+                "--accelerator-type", "h100",
+                "--client-host-memory-in-gb", "64",
+                "--data-dir", str(tmp_path / "data"),
+                "--results-dir", str(tmp_path / "results"),
+                "--object", "s3",
+            ],
+        )
+        assert args.data_access_protocol == "s3"
+        assert not hasattr(args, "file")
+        assert not hasattr(args, "object")

From b54b6526082050bf666250be2675522e0ac80682 Mon Sep 17 00:00:00 2001
From: Devasena Inupakutika <devasena.i@samsung.com>
Date: Tue, 12 May 2026 04:11:06 +0000
Subject: [PATCH 2/2] Remove unwanted file

---
 requirements.txt | 251 -----------------------------------------------
 1 file changed, 251 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index b132b220..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1,251 +0,0 @@
-# This file was autogenerated by uv via the following command:
-#    uv pip compile pyproject.toml -o requirements.txt --universal
-absl-py==2.4.0
-    # via
-    #   keras
-    #   tensorboard
-    #   tensorflow
-antlr4-python3-runtime==4.9.3
-    # via
-    #   hydra-core
-    #   omegaconf
-argon2-cffi==25.1.0
-    # via minio
-argon2-cffi-bindings==25.1.0
-    # via argon2-cffi
-astunparse==1.6.3
-    # via tensorflow
-certifi==2026.4.22
-    # via
-    #   minio
-    #   requests
-cffi==2.0.0
-    # via argon2-cffi-bindings
-charset-normalizer==3.4.7
-    # via requests
-cuda-bindings==13.2.0 ; sys_platform == 'linux'
-    # via torch
-cuda-pathfinder==1.5.4 ; sys_platform == 'linux'
-    # via cuda-bindings
-cuda-toolkit==13.0.2 ; sys_platform == 'linux'
-    # via torch
-dgen-py==0.2.4
-    # via dlio-benchmark
-dlio-benchmark @ git+https://github.com/russfellows/dlio_benchmark.git@842fb9b0bd9d26c773433b4d0805922040206b50
-    # via mlpstorage (pyproject.toml)
-filelock==3.29.0
-    # via torch
-flatbuffers==25.12.19
-    # via tensorflow
-fsspec==2026.4.0
-    # via torch
-gast==0.7.0
-    # via tensorflow
-google-pasta==0.2.0
-    # via tensorflow
-grpcio==1.80.0
-    # via
-    #   tensorboard
-    #   tensorflow
-h5py==3.16.0
-    # via
-    #   dlio-benchmark
-    #   keras
-    #   tensorflow
-hydra-core==1.3.2
-    # via dlio-benchmark
-idna==3.14
-    # via requests
-jinja2==3.1.6
-    # via torch
-keras==3.14.1
-    # via tensorflow
-libclang==18.1.1
-    # via tensorflow
-markdown==3.10.2
-    # via tensorboard
-markdown-it-py==4.2.0
-    # via rich
-markupsafe==3.0.3
-    # via
-    #   jinja2
-    #   werkzeug
-mdurl==0.1.2
-    # via markdown-it-py
-minio==7.2.20
-    # via mlpstorage (pyproject.toml)
-ml-dtypes==0.5.4
-    # via
-    #   keras
-    #   tensorflow
-mpi4py==4.1.1
-    # via dlio-benchmark
-mpmath==1.3.0
-    # via sympy
-namex==0.1.0
-    # via keras
-networkx==3.6.1
-    # via torch
-numpy==2.4.4
-    # via
-    #   dlio-benchmark
-    #   h5py
-    #   keras
-    #   ml-dtypes
-    #   pandas
-    #   s3dlio
-    #   tensorboard
-    #   tensorflow
-nvidia-cublas==13.1.0.3 ; sys_platform == 'linux'
-    # via
-    #   cuda-toolkit
-    #   nvidia-cudnn-cu13
-    #   nvidia-cusolver
-nvidia-cuda-cupti==13.0.85 ; sys_platform == 'linux'
-    # via cuda-toolkit
-nvidia-cuda-nvrtc==13.0.88 ; sys_platform == 'linux'
-    # via cuda-toolkit
-nvidia-cuda-runtime==13.0.96 ; sys_platform == 'linux'
-    # via cuda-toolkit
-nvidia-cudnn-cu13==9.19.0.56 ; sys_platform == 'linux'
-    # via torch
-nvidia-cufft==12.0.0.61 ; sys_platform == 'linux'
-    # via cuda-toolkit
-nvidia-cufile==1.15.1.6 ; sys_platform == 'linux'
-    # via cuda-toolkit
-nvidia-curand==10.4.0.35 ; sys_platform == 'linux'
-    # via cuda-toolkit
-nvidia-cusolver==12.0.4.66 ; sys_platform == 'linux'
-    # via cuda-toolkit
-nvidia-cusparse==12.6.3.3 ; sys_platform == 'linux'
-    # via
-    #   cuda-toolkit
-    #   nvidia-cusolver
-nvidia-cusparselt-cu13==0.8.0 ; sys_platform == 'linux'
-    # via torch
-nvidia-nccl-cu13==2.28.9 ; sys_platform == 'linux'
-    # via torch
-nvidia-nvjitlink==13.0.88 ; sys_platform == 'linux'
-    # via
-    #   cuda-toolkit
-    #   nvidia-cufft
-    #   nvidia-cusolver
-    #   nvidia-cusparse
-nvidia-nvshmem-cu13==3.4.5 ; sys_platform == 'linux'
-    # via torch
-nvidia-nvtx==13.0.85 ; sys_platform == 'linux'
-    # via cuda-toolkit
-omegaconf==2.3.0
-    # via
-    #   dlio-benchmark
-    #   hydra-core
-opt-einsum==3.4.0
-    # via tensorflow
-optree==0.19.1
-    # via keras
-packaging==26.2
-    # via
-    #   mlpstorage (pyproject.toml)
-    #   hydra-core
-    #   keras
-    #   tensorboard
-    #   tensorflow
-    #   wheel
-pandas==3.0.3
-    # via dlio-benchmark
-pillow==12.2.0
-    # via
-    #   dlio-benchmark
-    #   tensorboard
-protobuf==7.34.1
-    # via
-    #   tensorboard
-    #   tensorflow
-psutil==7.2.2
-    # via
-    #   mlpstorage (pyproject.toml)
-    #   dlio-benchmark
-pyarrow==24.0.0
-    # via
-    #   mlpstorage (pyproject.toml)
-    #   dlio-benchmark
-pycparser==3.0 ; implementation_name != 'PyPy'
-    # via cffi
-pycryptodome==3.23.0
-    # via minio
-pydftracer==2.0.2
-    # via dlio-benchmark
-pygments==2.20.0
-    # via rich
-python-dateutil==2.9.0.post0
-    # via pandas
-python-dotenv==1.2.2
-    # via mlpstorage (pyproject.toml)
-pyyaml==6.0.3
-    # via
-    #   mlpstorage (pyproject.toml)
-    #   dlio-benchmark
-    #   omegaconf
-requests==2.34.0
-    # via tensorflow
-rich==15.0.0
-    # via
-    #   mlpstorage (pyproject.toml)
-    #   keras
-s3dlio==0.9.98
-    # via
-    #   mlpstorage (pyproject.toml)
-    #   dlio-benchmark
-s3torchconnector==1.5.0
-    # via mlpstorage (pyproject.toml)
-s3torchconnectorclient==1.5.0
-    # via s3torchconnector
-setuptools==81.0.0
-    # via
-    #   tensorboard
-    #   tensorflow
-    #   torch
-six==1.17.0
-    # via
-    #   astunparse
-    #   google-pasta
-    #   python-dateutil
-    #   tensorflow
-sympy==1.14.0
-    # via torch
-tensorboard==2.20.0
-    # via tensorflow
-tensorboard-data-server==0.7.2
-    # via tensorboard
-tensorflow==2.20.0
-    # via dlio-benchmark
-termcolor==3.3.0
-    # via tensorflow
-torch==2.11.0
-    # via
-    #   dlio-benchmark
-    #   s3torchconnector
-triton==3.6.0 ; sys_platform == 'linux'
-    # via torch
-typing-extensions==4.15.0
-    # via
-    #   dlio-benchmark
-    #   grpcio
-    #   minio
-    #   optree
-    #   tensorflow
-    #   torch
-tzdata==2026.2 ; sys_platform == 'emscripten' or sys_platform == 'win32'
-    # via pandas
-urllib3==2.7.0
-    # via
-    #   minio
-    #   requests
-werkzeug==3.1.8
-    # via tensorboard
-wheel==0.47.0
-    # via astunparse
-wrapt==2.1.2
-    # via tensorflow
-zstandard==0.25.0
-    # via dgen-py