In [4]:
import datetime
import os
import boto3
from botocore import UNSIGNED
from botocore.config import Config
import xarray as xr
import s3fs
from dask import delayed

In [5]:
def download_CMI(yyyymmddhhmn, band, path_local, path_dest, product_name):
    os.makedirs(path_local, exist_ok=True)
    os.makedirs(path_dest, exist_ok=True)

    year = datetime.datetime.strptime(yyyymmddhhmn, '%Y%m%d%H%M').strftime('%Y')
    day_of_year = datetime.datetime.strptime(yyyymmddhhmn, '%Y%m%d%H%M').strftime('%j')
    hour = datetime.datetime.strptime(yyyymmddhhmn, '%Y%m%d%H%M').strftime('%H')
    min = datetime.datetime.strptime(yyyymmddhhmn, '%Y%m%d%H%M').strftime('%M')

    # AMAZON repository information
    bucket_name = 'noaa-goes16'

    # Initializes the S3 client
    s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED))

    # File structure
    prefix = f'{product_name}/{year}/{day_of_year}/{hour}/OR_{product_name}-M6C{int(band):02.0f}_G16_s{year}{day_of_year}{hour}{min}'

    # Search for the file on the server
    s3_result = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix, Delimiter="/")

    # Check if there are files available
    if 'Contents' not in s3_result:
        # There are no files
        print(f'No files found for the date: {yyyymmddhhmn}, Band-{band}')
        return -1
    else:
        # There are files
        for obj in s3_result['Contents']:
            key = obj['Key']
            # Extract the file name
            file_name = key.split('/')[-1].split('.')[0]

            # Check if the file exists in either path_local or path_dest
            local_file_path = f'{path_local}/{file_name}.nc'
            dest_file_path = f'{path_dest}/{file_name}.nc'

            if os.path.exists(local_file_path):
                print(f'File already exists in {path_local}: {local_file_path}')
                return file_name
            elif os.path.exists(dest_file_path):
                print(f'File already exists in {path_dest}: {dest_file_path}')
                return file_name
            else:
                # File doesn't exist in either location, proceed with download
                print(f'Downloading file to {path_local}: {local_file_path}')
                s3_client.download_file(bucket_name, key, local_file_path)
                return file_name

    return None

In [6]:
import os
import xarray as xr
from dask import delayed

def process_and_downscale_cmi(path_local, path_dest, date_save, time_save, product_name, Band):
    # Create the necessary yyyymmddhhmn format for downloading
    yyyymmddhhmn = date_save + time_save
    
    file_name = download_CMI(yyyymmddhhmn, Band, path_local, path_dest, product_name)
    
    # Check if the file already exists in the destination path
    output_path = f'{path_dest}/{file_name}.nc'
    if os.path.exists(output_path):
        print(f"File {output_path} already exists. Skipping download and processing.")
        return
    
    # Construct the path for the downloaded file
    ncs = [f'{path_local}{i}' for i in os.listdir(path_local) if i.endswith(file_name + '.nc')]
    
    if not ncs:
        print(f"No file found for {file_name}.")
        return

    with xr.open_dataset(ncs[0]) as ds:
        target_x_res = 300
        target_y_res = 300
        # Define geographic limits for cropping
        ds_sel = delayed(ds.sel)(x=slice(-0.05, 0.07), y=slice(0.09, -0.03))
        original_x_size = delayed(ds_sel.sizes['x'])
        original_y_size = delayed(ds_sel.sizes['y'])
        scale_factor_x = delayed(lambda ox: max(ox // target_x_res, 1))(original_x_size)
        scale_factor_y = delayed(lambda oy: max(oy // target_y_res, 1))(original_y_size)

        # Apply the downscaling factor to each image
        ds_downscaled = delayed(ds_sel.coarsen)(x=scale_factor_x, y=scale_factor_y, boundary="trim").mean()
        downscaled = ds_downscaled.compute()

        # Save the downscaled dataset
        downscaled.to_netcdf(output_path)
        print(f"Downscaled file saved at {output_path}")

    # After the dataset is closed, you can safely delete the original file
    temp_path = f'{path_local}/{file_name}.nc'
    if os.path.exists(temp_path):
        os.remove(temp_path)
        print(f"File {temp_path} has been deleted.")
    else:
        print(f"File {temp_path} does not exist.")


# path_local = 'D:/RODRIGO/IntradayForecasting/content/GOES_Files/CMI/' # Ruta para archivos temporales
# path_dest = 'D:/RODRIGO/IntradayForecasting/content/GOES_Files/CMIPF_sliced_resized/'
# date_save = '20240705'
# time_save = '1500'
# product_name = 'ABI-L2-CMIPF'
# Band = 5

# process_and_downscale_cmi(path_local, path_dest, date_save, time_save, product_name, Band)


In [None]:
import datetime

# Function to generate time strings in hourly increments 
def generate_time_strings_daytime():
    times = []
    for hour in range(9, 24):  
        times.append(f'{hour:02}00')
    times.append('0000')  # (24:00 == 00:00)
    return times

def generate_time_strings():
    times = []
    for hour in range(0, 24):  # Generate times from 0000 to 2300
        times.append(f'{hour:02}00')
    return times

# Function to process and downscale GOES data between two dates, at specified times
def process_goes_between_dates(path_local, path_dest, start_date, end_date, product_name, band):
    # Convert string dates to datetime objects
    start_date_obj = datetime.datetime.strptime(start_date, '%Y%m%d')
    end_date_obj = datetime.datetime.strptime(end_date, '%Y%m%d')

    # Generate a list of hourly time strings
    # times = generate_time_strings()
    times = generate_time_strings_daytime()

    # Iterate over each date between the start and end dates (inclusive)
    current_date_obj = start_date_obj
    while current_date_obj <= end_date_obj:
        # Format the date as a string in 'YYYYMMDD' format
        date_save = current_date_obj.strftime('%Y%m%d')

        # Iterate over each time (0900, 1000, 1100, ..., 0000)
        for time_save in times:
            # process, and downscale the file
            try:
                print(f"Processing for {date_save} at {time_save}")
                process_and_downscale_cmi(path_local, path_dest, date_save, time_save, product_name, band)
            except Exception as e:
                # Handle any errors that occur during the process
                print(f"Error processing for {date_save} at {time_save}: {e}")
        
        # Move to the next day
        current_date_obj += datetime.timedelta(days=1)

# Usage
path_local = 'D:/RODRIGO/IntradayForecasting/content/GOES_Files/CMI/' # Ruta para archivos temporales
path_dest = 'D:/RODRIGO/IntradayForecasting/content/GOES_Files/CMIPF_sliced_resized/'
start_date = '20240801'  # Example start date
end_date = '20240830'    # Example end date
product_name = 'ABI-L2-CMIPF'
band = 2 

process_goes_between_dates(path_local, path_dest, start_date, end_date, product_name, band)