In [1]:
"""
This script organizes radar data files into manageable text file chunks.

- Input:
  Looks for radar data files (.h5) inside the specified base directory:
      /scratch_ssd2/stefanelli/3DHNN_18_06_2025/0_DATASETS_FROM_ARSO_AND_UNZIPPED/RADAR_unzipped/LISCA/{radar}/
  where {radar} is defined by the variable `radar` (e.g., 'PAZZ41' or 'PAZZ42').

- Processing:
  Iterates over years 2019–2023 and months 01–12.
  For each year–month combination, collects and sorts all files matching:
      T_{radar}_C_LJLM_{YYYYMM}*.h5
  Splits the file list into chunks of `chunk_size` (default 100 files per chunk).

- Output:
  For each chunk, writes the file paths into a separate text file:
      0_FILES_DIR/{radar}/{YYYYMM}_part_{NNN}.txt
  ensuring the output directory exists beforehand.

- Purpose:
  This helps in batch processing large radar datasets by breaking down 
  file lists into smaller, more manageable subsets.

AUTHOR: Marco Stefanelli
"""


from pathlib import Path
import glob

files_dir = Path('/scratch_ssd2/stefanelli/3DHNN_18_06_2025/0_DATASETS_FROM_ARSO_AND_UNZIPPED/RADAR_unzipped/LISCA/')
radar = 'PAZZ41'  # or PAZZ42
chunk_size = 100

output_dir = Path(f'0_FILES_DIR/{radar}')
output_dir.mkdir(parents=True, exist_ok=True)

print('\n-------------------------------------------------------------------\n', flush=True)

for year in range(2019, 2024):  # 2023 inclusive
    for month in range(1, 13):  # 1–12
        ym = f"{year}{month:02}"
        print(ym)

        # Get sorted file list
        pattern = str(files_dir / radar / f"T_{radar}_C_LJLM_{ym}*.h5")
        filenames = sorted(glob.glob(pattern))
        n_files = len(filenames)
        print(n_files)

        # Write chunks
        for part, start in enumerate(range(0, n_files, chunk_size), start=1):
            chunk = filenames[start:start + chunk_size]
            out_file = output_dir / f"{ym}_part_{part:03}.txt"

            with out_file.open('w') as f:
                f.write("\n".join(chunk))

print("DONE!!!")


-------------------------------------------------------------------

201901
4460
201902
4032
201903
5115
201904
8610
201905
8739
201906
8362
201907
8846
201908
8801
201909
8599
201910
8711
201911
7877
201912
8883
202001
8873
202002
8323
202003
8877
202004
8610
202005
8809
202006
7579
202007
8778
202008
8755
202009
8543
202010
8792
202011
8572
202012
7794
202101
8036
202102
8019
202103
8879
202104
8608
202105
7090
202106
8493
202107
8835
202108
8824
202109
6728
202110
7704
202111
8600
202112
8866
202201
8598
202202
7749
202203
8861
202204
8591
202205
8881
202206
8087
202207
8881
202208
8897
202209
8575
202210
8877
202211
8594
202212
8889
202301
8620
202302
8011
202303
8885
202304
8594
202305
8887
202306
8606
202307
8871
202308
8845
202309
8510
202310
8862
202311
8556
202312
8895
DONE!!!
