In [1]:
import os
import numpy as np
import rasterio
from rasterio.enums import Resampling
from datetime import datetime
import re

def aggregate_monthly_tifs(input_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Get a list of all TIF files in the input folder
    tif_files = [f for f in os.listdir(input_folder) if f.endswith('.tif')]
    tif_files = sorted(tif_files, key=lambda x: datetime.strptime(re.search(r'(\d{4}\.\d{2}\.\d{2})', x.split('_')[2] ).group(1), "%Y.%m.%d"))
#     print(tif_files)

    # Create a dictionary to store data for each month
    daily_avg_data = {}
    p1 = None
    daily_key = None
    daily_count = {}

    # Loop through each TIF file
    for tif_file in tif_files:
        tif_path = os.path.join(input_folder, tif_file)

        # Extract the date from the filename
#         date_str = tif_file.split('_')[-1]
        date_str = re.search(r'(\d{4}\.\d{2}\.\d{2})', tif_file.split('_')[-1] ).group(1)
#         print(date_str)
        date = datetime.strptime(date_str, "%Y.%m.%d")
#         print(date)
        # Open the TIF file
        with rasterio.open(tif_path) as src:
            data = src.read(1, resampling=Resampling.average)
        
        p1 = daily_key

        # Accumulate data for each month
        daily_key = date.strftime("%m.%d")
        
        if daily_key in daily_avg_data:
            daily_avg_data[daily_key] += data
            daily_count[daily_key]+= 1
        else:
            daily_avg_data[daily_key] = data
            daily_count[daily_key]= 1
#     print(daily_count['02.29'])
        
#         if p1 != daily_key:
#             print(daily_key)
            
       
        

    # Create a new TIF file for each month
    for daily_key, daily_accumulated_data in daily_avg_data.items():
        # Calculate the monthly average
        daily_average_data = daily_accumulated_data / int(daily_count[daily_key])
        print(daily_key)
        print(int(daily_count[daily_key]))

        # Create the output TIF file path
        output_tif_path = os.path.join(output_folder, f'africa_arc_{daily_key}.tif')

        # Get the metadata from one of the input TIF files
        with rasterio.open(os.path.join(input_folder, tif_files[0])) as template_src:
            profile = template_src.profile

            # Create the output TIF file
            with rasterio.open(output_tif_path, 'w', **profile) as dst:
                dst.write(daily_average_data, 1)
    print('Action is completed')

if __name__ == "__main__":
    input_folder = r"/efs/Incomati/Precipitation/Arc/daily_1"
    output_folder = r"/efs/Incomati/Precipitation/Arc/dalily_2"

    aggregate_monthly_tifs(input_folder, output_folder)


01.01
22
01.02
22
01.03
22
01.04
22
01.05
21
01.06
20
01.07
21
01.08
21
01.10
21
01.11
21
01.12
21
01.13
21
01.14
21
01.15
21
01.16
21
01.17
21
01.18
21
01.19
21
01.20
21
01.21
21
01.22
21
01.23
21
01.24
21
01.25
21
01.26
20
01.27
20
01.28
21
01.29
21
01.30
21
01.31
21
02.01
21
02.02
21
02.03
21
02.04
21
02.05
21
02.06
21
02.07
21
02.08
21
02.09
21
02.10
21
02.11
21
02.12
21
02.13
22
02.14
22
02.15
22
02.16
22
02.17
22
02.18
22
02.19
22
02.20
21
02.21
22
02.22
22
02.23
22
02.24
22
02.25
22
02.26
22
02.28
22
03.01
22
03.02
22
03.03
22
03.04
22
03.05
22
03.06
22
03.07
22
03.08
22
03.09
22
03.10
22
03.11
22
03.12
22
03.13
22
03.14
22
03.15
22
03.16
22
03.17
22
03.18
22
03.19
22
03.20
22
03.21
22
03.22
22
03.23
22
03.24
22
03.25
22
03.26
22
03.27
22
03.28
22
03.29
22
03.30
22
03.31
22
04.01
21
04.02
22
04.03
21
04.04
22
04.05
21
04.06
22
04.07
22
04.08
22
04.09
22
04.10
22
04.11
22
04.12
22
04.13
22
04.14
22
04.15
22
04.16
22
04.17
22
04.18
21
04.19
22
04.20
22
04.21
22
04.22
22
04.23
22
0