From 5f13741aa3a37cc020a02bb01ebfcbfd72cab4cb Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 2 Dec 2025 22:10:41 -0800 Subject: [PATCH] Use pattern matches for original soure log file: YYYYMMDDHHMMSS_YYYYMMDDHHMMSS.nc4 --- .vscode/launch.json | 4 ++-- src/data/process.py | 17 +++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index db045e5..c3f3aa1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -358,9 +358,9 @@ // Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025 //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable - "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"] + //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"] // Full month of June 2025 for Pontus with WetLabsUBAT Group data - //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"] //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250707T043011/slate.nc4", "--no_cleanup"] }, diff --git a/src/data/process.py b/src/data/process.py index 2227b76..d2d3376 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -308,6 +308,8 @@ def _find_log_files_in_datetime_dir( self, datetime_dir: Path, start_dt: datetime, end_dt: datetime ) -> list: """Find log files in a datetime directory if it's in range.""" + import re + log_files = [] # Normalize and parse directory datetime @@ -321,12 +323,15 @@ def _find_log_files_in_datetime_dir( # Check if directory datetime is in range if start_dt <= dir_dt <= end_dt: - # Look for main log file (*.nc4 file) - nc4_files = list(datetime_dir.glob("*.nc4")) - if nc4_files: - relative_path = str(nc4_files[0].relative_to(Path(self.vehicle_dir))) - log_files.append(relative_path) - self.logger.debug("Found log file: %s", relative_path) + # Look for main log file (*.nc4 file) but exclude _combined.nc4 and _align.nc4 + # Pattern matches: YYYYMMDDHHMMSS_YYYYMMDDHHMMSS.nc4 + # Example: 202506072219_202506072336.nc4 + log_pattern = re.compile(r"^\d{12}_\d{12}\.nc4$") + for nc4_file in datetime_dir.glob("*.nc4"): + if log_pattern.match(nc4_file.name): + relative_path = str(nc4_file.relative_to(Path(self.vehicle_dir))) + log_files.append(relative_path) + self.logger.debug("Found log file: %s", relative_path) return log_files