In [59]:
import os
from datetime import datetime, timedelta
import re
import os
import numpy as np
import rasterio
from rasterio.enums import Resampling


def generate_expected_filenames(start_date, end_date):
    current_date = start_date
    expected_filenames = []

    while current_date <= end_date:
        formatted_date = current_date.strftime('%Y.%m.%d')
        expected_filenames.append(f'africa_arc_{formatted_date}.tif')
        current_date += timedelta(days=1)

    return expected_filenames

def find_missing_files(folder_path, expected_filenames):
    actual_filenames = os.listdir(folder_path)
    missing_files = set(expected_filenames) - set(actual_filenames)
    missing_files = sorted(missing_files, key=lambda x: datetime.strptime(re.search(r'(\d{4}\.\d{2}\.\d{2})', x.split('_')[2] ).group(1), "%Y.%m.%d"))
    return list(missing_files)



def aggregate_monthly_tifs(input_folder, output_folder, missing_files):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    tif_files = [f for f in os.listdir(input_folder) if f.endswith('.tif')]

#     Get a list of all TIF files in the input folder
    for missing_file in missing_files:
#         tif_files = sorted(tif_files, key=lambda x: datetime.strptime(re.search(r'(\d{2}\.\d{2})', x.split('_')[2] ).group(1), "%m.%d"))
        Name_missing = missing_file.split('_')[-1]
        Name_missing = re.search(r'(\d{4}\.\d{2}\.\d{2})', Name_missing.split('_')[-1] ).group(1)
        date_missing = datetime.strptime(Name_missing, "%Y.%m.%d")
        missing_key = date_missing.strftime("%m.%d")
        print(Name_missing)
        for tif_file in tif_files:
            tif_path = os.path.join(input_folder, tif_file)
            with rasterio.open(tif_path) as src:
                data = src.read(1, resampling=Resampling.average)
            date_str = tif_file.split('_')[-1]
            match = re.search(r'(\d+\.\d+)', date_str).group(1)
            
            if missing_key== match:
                print(match)
                output_tif_path = os.path.join(output_folder, f'africa_arc_{Name_missing}.tif')

        # Get the metadata from one of the input TIF files
                with rasterio.open(os.path.join(input_folder, tif_file)) as template_src:
                    profile = template_src.profile

            # Create the output TIF file
                    with rasterio.open(output_tif_path, 'w', **profile) as dst:
                        dst.write(data, 1)
    print('Action is completed')







In [60]:
def main():
    start_date = datetime(2001, 1, 1)
    end_date = datetime(2022, 4, 28)
    folder_path = r'/efs/Incomati/Precipitation/Arc/daily_1'

    expected_filenames = generate_expected_filenames(start_date, end_date)
    missing_files = find_missing_files(folder_path, expected_filenames)
#     print( missing_files )
    if missing_files:
        print('Missing files:')
#         for missing_file in missing_files:
#             print(missing_file)
    else:
        print('No missing files found.')
    
    input_folder = r"/efs/Incomati/Precipitation/Arc/daily_avg"
    output_folder = r"/efs/Incomati/Precipitation/Arc/Monthly_3"

    aggregate_monthly_tifs(input_folder, output_folder, missing_files)



if __name__ == "__main__":
    missing_files = main()

Missing files:
2001.01.09
01.09
2001.02.27
02.27
2003.12.31
12.31
2004.01.26
01.26
2004.01.27
01.27
2005.04.18
04.18
2009.11.15
11.15
2009.11.16
11.16
2011.12.18
12.18
2014.01.06
01.06
2014.01.07
01.07
2014.01.08
01.08
2014.01.09
01.09
2014.01.10
01.10
2014.01.11
01.11
2014.01.12
01.12
2014.01.13
01.13
2014.01.14
01.14
2014.01.15
01.15
2014.01.16
01.16
2014.01.17
01.17
2014.01.18
01.18
2014.01.19
01.19
2014.01.20
01.20
2014.01.21
01.21
2014.01.22
01.22
2014.01.23
01.23
2014.01.24
01.24
2014.01.25
01.25
2014.01.26
01.26
2014.01.27
01.27
2014.01.28
01.28
2014.01.29
01.29
2014.01.30
01.30
2014.01.31
01.31
2014.02.01
02.01
2014.02.02
02.02
2014.02.03
02.03
2014.02.04
02.04
2014.02.05
02.05
2014.02.06
02.06
2014.02.07
02.07
2014.02.08
02.08
2014.02.09
02.09
2014.02.10
02.10
2014.02.11
02.11
2014.02.12
02.12
2014.04.28
04.28
2014.04.29
04.29
2014.04.30
04.30
2014.05.01
05.01
2014.05.02
05.02
2014.05.03
05.03
2014.05.04
05.04
2014.05.05
05.05
2014.05.06
05.06
2014.05.07
05.07
2014.05.08
05.08