parameterlab · cemde · Nov 23, 2025 · Nov 21, 2025 · Nov 23, 2025 · Nov 23, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
+- `FileResultLogger` now accepts `pathlib.Path` for argument `output_dir` and has an `overwrite` argument to prevent overwriting of existing logs files.
+
 ### Fixed
 
 ### Removed

diff --git a/maseval/core/callbacks/result_logger.py b/maseval/core/callbacks/result_logger.py
@@ -297,10 +297,11 @@ class FileResultLogger(ResultLogger):
 
     def __init__(
         self,
-        output_dir: str = "./results",
+        output_dir: Path | str = "./results",
         filename_pattern: str = "benchmark_{timestamp}.jsonl",
         write_metadata: bool = True,
         atomic_writes: bool = True,
+        overwrite: bool = False,
         include_traces: bool = True,
         include_config: bool = True,
         include_eval: bool = True,
@@ -309,11 +310,14 @@ def __init__(
         """Initialize the file logger.
 
         Args:
-            output_dir: Directory where result files will be written (created if needed)
+            output_dir: Directory where result files will be written (created if needed).
+                Accepts either a Path object or a string path.
             filename_pattern: Pattern for result filename. Use {timestamp} for
                 automatic timestamp insertion (format: YYYYMMDD_HHMMSS)
             write_metadata: If True, write a metadata file alongside results
             atomic_writes: If True, use atomic writes (write to temp, then rename)
+            overwrite: If True, overwrite existing files. If False, raise an error
+                when the output file already exists.
             include_traces: If True, include execution traces in logged results
             include_config: If True, include configuration in logged results
             include_eval: If True, include evaluation results in logged results
@@ -330,6 +334,7 @@ def __init__(
         self.filename_pattern = filename_pattern
         self.write_metadata = write_metadata
         self.atomic_writes = atomic_writes
+        self.overwrite = overwrite
 
         # Runtime state
         self._output_path: Optional[Path] = None
@@ -447,6 +452,7 @@ def _initialize_output_file(self) -> None:
 
         Raises:
             IOError: If file or directory creation fails
+            FileExistsError: If output file exists and overwrite is False
         """
         # Create output directory
         self.output_dir.mkdir(parents=True, exist_ok=True)
@@ -458,6 +464,10 @@ def _initialize_output_file(self) -> None:
         filename = self.filename_pattern.replace("{timestamp}", self._timestamp)
         self._output_path = self.output_dir / filename
 
+        # Check if file exists and handle overwrite
+        if self._output_path.exists() and not self.overwrite:
+            raise FileExistsError(f"Output file already exists: {self._output_path}. Set overwrite=True to allow overwriting existing files.")
+
         # Open file for writing
         self._file_handle = open(self._output_path, "w")
 

diff --git a/tests/test_core/test_callbacks/test_file_result_logger.py b/tests/test_core/test_callbacks/test_file_result_logger.py
@@ -7,6 +7,7 @@
 """
 
 import json
+from pathlib import Path
 
 import pytest
 
@@ -63,3 +64,114 @@ def test_file_result_logger_writes_jsonl(tmp_path):
     assert obj["task_id"] == report["task_id"]
     assert obj["repeat_idx"] == report["repeat_idx"]
     assert "traces" in obj and "config" in obj and "eval" in obj
+
+
+@pytest.mark.core
+def test_file_result_logger_accepts_pathlib_path(tmp_path):
+    """Test that FileResultLogger accepts pathlib.Path for output_dir.
+
+    Verifies that the logger works correctly when output_dir is specified
+    as a Path object instead of a string.
+    """
+    out_dir = tmp_path / "results"
+    out_dir.mkdir()
+
+    # Pass Path object directly instead of string
+    logger = FileResultLogger(output_dir=out_dir, filename_pattern="test_results.jsonl")
+
+    benchmark = MockBenchmark(n_tasks=1, n_repeats=1)
+    logger.on_run_start(benchmark)  # type: ignore[arg-type]
+
+    report = {
+        "task_id": benchmark.task_ids[0],
+        "repeat_idx": 0,
+        "traces": {"agent": "trace"},
+        "config": {"model": "gpt"},
+        "eval": {"score": 1.0},
+    }
+    logger.on_task_repeat_end(benchmark, report)  # type: ignore[arg-type]
+    logger.on_run_end(benchmark, [report])  # type: ignore[arg-type]
+
+    # Verify file was created
+    out_file = out_dir / "test_results.jsonl"
+    assert out_file.exists()
+    assert isinstance(logger.output_dir, Path)
+
+    lines = out_file.read_text().strip().splitlines()
+    assert len(lines) == 1
+
+
+@pytest.mark.core
+def test_file_result_logger_overwrite_false_prevents_overwriting(tmp_path):
+    """Test that FileResultLogger raises error when file exists and overwrite=False.
+
+    Verifies that when overwrite is False (default), attempting to write to
+    an existing file raises FileExistsError.
+    """
+    out_dir = tmp_path / "results"
+    out_dir.mkdir()
+
+    # Create an existing file
+    existing_file = out_dir / "test_results.jsonl"
+    existing_file.write_text("existing content\n")
+
+    # Try to create logger with overwrite=False (default)
+    logger = FileResultLogger(output_dir=out_dir, filename_pattern="test_results.jsonl", overwrite=False)
+
+    benchmark = MockBenchmark(n_tasks=1, n_repeats=1)
+    logger.on_run_start(benchmark)  # type: ignore[arg-type]
+
+    report = {
+        "task_id": benchmark.task_ids[0],
+        "repeat_idx": 0,
+        "traces": {"agent": "trace"},
+        "config": {"model": "gpt"},
+        "eval": {"score": 1.0},
+    }
+
+    # Should raise FileExistsError when trying to log first iteration
+    with pytest.raises(FileExistsError, match="Output file already exists.*Set overwrite=True"):
+        logger.on_task_repeat_end(benchmark, report)  # type: ignore[arg-type]
+
+    # Verify original file is unchanged
+    assert existing_file.read_text() == "existing content\n"
+
+
+@pytest.mark.core
+def test_file_result_logger_overwrite_true_allows_overwriting(tmp_path):
+    """Test that FileResultLogger overwrites existing file when overwrite=True.
+
+    Verifies that when overwrite is True, the logger successfully overwrites
+    an existing file with the same name.
+    """
+    out_dir = tmp_path / "results"
+    out_dir.mkdir()
+
+    # Create an existing file
+    existing_file = out_dir / "test_results.jsonl"
+    existing_file.write_text("existing content\n")
+
+    # Create logger with overwrite=True
+    logger = FileResultLogger(output_dir=out_dir, filename_pattern="test_results.jsonl", overwrite=True)
+
+    benchmark = MockBenchmark(n_tasks=1, n_repeats=1)
+    logger.on_run_start(benchmark)  # type: ignore[arg-type]
+
+    report = {
+        "task_id": benchmark.task_ids[0],
+        "repeat_idx": 0,
+        "traces": {"agent": "trace"},
+        "config": {"model": "gpt"},
+        "eval": {"score": 1.0},
+    }
+    logger.on_task_repeat_end(benchmark, report)  # type: ignore[arg-type]
+    logger.on_run_end(benchmark, [report])  # type: ignore[arg-type]
+
+    # Verify file was overwritten with new content
+    lines = existing_file.read_text().strip().splitlines()
+    assert len(lines) == 1
+    assert "existing content" not in existing_file.read_text()
+
+    obj = json.loads(lines[0])
+    assert obj["task_id"] == report["task_id"]
+    assert obj["repeat_idx"] == report["repeat_idx"]