Skip to content

Commit

Permalink
Add a date_idx option to group_by_date (#29)
Browse files Browse the repository at this point in the history
  • Loading branch information
scottstanie committed Feb 14, 2024
1 parent 5710c0e commit 8884888
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 4 deletions.
19 changes: 15 additions & 4 deletions src/opera_utils/_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,17 +144,24 @@ def _date_format_to_regex(date_format: str) -> re.Pattern:


def group_by_date(
files: Iterable[PathLikeT], file_date_fmt: str = DATE_FORMAT
files: Iterable[PathLikeT],
file_date_fmt: str = DATE_FORMAT,
date_idx: int | None = None,
) -> dict[tuple[datetime.datetime, ...], list[PathLikeT]]:
"""Combine files by date into a dict.
Parameters
----------
files: Iterable[Filename]
files : Iterable[Filename]
Path to folder containing files with dates in the filename.
file_date_fmt: str
file_date_fmt : str
Format of the date in the filename.
Default is [dolphin.DEFAULT_DATETIME_FORMAT][]
date_idx : int, optional
If provided, uses only this index of the dates found in each filename.
For example, if `file_date_fmt='%Y%m%d'`, and the files have pairs of
these date strings but you only wish to group by the first, use
`date_idx=0`.
Returns
-------
Expand Down Expand Up @@ -182,7 +189,11 @@ def group_by_date(
for dates, g in itertools.groupby(
files, key=lambda x: tuple(get_dates(x, fmt=file_date_fmt))
):
grouped_images[dates].extend(list(g))
if date_idx is None:
key = dates
else:
key = (dates[date_idx],)
grouped_images[key].extend(list(g))
return grouped_images


Expand Down
73 changes: 73 additions & 0 deletions tests/test_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,76 @@ def test_sort_by_date_different_fmt():
)
assert sorted_files == expected_files
assert sorted_dates == expected_dates


def test_group_by_date():
files = [
"slc_20180101.tif",
"slc_1_20190101.tif",
"slc_2_20190101.tif",
"slc_20210101.tif",
]
expected = {
(datetime.datetime(2018, 1, 1),): [
"slc_20180101.tif",
],
(datetime.datetime(2019, 1, 1),): [
"slc_1_20190101.tif",
"slc_2_20190101.tif",
],
(datetime.datetime(2021, 1, 1),): [
"slc_20210101.tif",
],
}
assert expected == _dates.group_by_date(files)


def test_group_by_date_with_idx():
files = [
"slc_20170101_20180101.tif",
"slc_20170101_20190101.tif",
"slc_20170101_20210101.tif",
]
expected = {
(
datetime.datetime(2017, 1, 1),
datetime.datetime(2018, 1, 1),
): [
"slc_20170101_20180101.tif",
],
(
datetime.datetime(2017, 1, 1),
datetime.datetime(2019, 1, 1),
): [
"slc_20170101_20190101.tif",
],
(
datetime.datetime(2017, 1, 1),
datetime.datetime(2021, 1, 1),
): [
"slc_20170101_20210101.tif",
],
}
assert expected == _dates.group_by_date(files)

expected_idx1 = {
(datetime.datetime(2018, 1, 1),): [
"slc_20170101_20180101.tif",
],
(datetime.datetime(2019, 1, 1),): [
"slc_20170101_20190101.tif",
],
(datetime.datetime(2021, 1, 1),): [
"slc_20170101_20210101.tif",
],
}
assert expected_idx1 == _dates.group_by_date(files, date_idx=1)

expected_idx0 = {
(datetime.datetime(2017, 1, 1),): [
"slc_20170101_20180101.tif",
"slc_20170101_20190101.tif",
"slc_20170101_20210101.tif",
]
}
assert expected_idx0 == _dates.group_by_date(files, date_idx=0)

0 comments on commit 8884888

Please sign in to comment.