In [1]:
import xarray as xr
import os
import glob

In [5]:
input_folder = "./raw/pr"
output_folder = "./processed/pr"
os.makedirs(input_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)
    
nc_file_list = glob.glob(os.path.join(input_folder, "*.nc"))

if nc_file_list:
    # Open all files as a single dataset along the time dimension
    combined_ds = xr.open_mfdataset(nc_file_list, combine='by_coords')
    # Group by year (using the 'year' coordinate) and process each year once
    for year, yearly_ds in combined_ds.groupby('time.year'):
        
        augmented_yearly_data = yearly_ds.max(dim='time', keep_attrs=True)
        
        heavy_rainfall = (yearly_ds['pr'] > 20).sum(dim='time', keep_attrs=True)
        
        # Calculate wet_days and align coordinates to match other variables
        wet_days = (yearly_ds['pr'] > 5).sum(dim='time', keep_attrs=True)
        
        yearly_sum = yearly_ds['pr'].sum(dim='time', keep_attrs=True)
        
        
        rolling_5day_sum = yearly_ds['pr'].rolling(time=5, min_periods=5).sum()
        max_5day_sum = rolling_5day_sum.max(dim='time', keep_attrs=True)
        
        rolling_3day_sum = yearly_ds['pr'].rolling(time=3, min_periods=3).sum()
        max_3day_sum = rolling_3day_sum.max(dim='time', keep_attrs=True)

        augmented_yearly_data['max_5day_sum'] = max_5day_sum
        augmented_yearly_data['max_3day_sum'] = max_3day_sum
        augmented_yearly_data['sum'] = yearly_sum
        augmented_yearly_data['over_20'] = heavy_rainfall
        augmented_yearly_data['wet_days'] = wet_days
        
        out_path = os.path.join(
            output_folder,
            f"pr_year_{year}.nc"
        )
        augmented_yearly_data.to_netcdf(out_path)
    combined_ds.close()



In [6]:
# Find all NetCDF files in the output_folder
nc_files = glob.glob(os.path.join(output_folder, "*.nc"))

# Open and merge all datasets along the 'time' dimension
merged_ds = xr.open_mfdataset(nc_files, combine='nested', concat_dim='year')

# Optionally, save the merged dataset to a new file
merged_ds.to_netcdf(os.path.join(output_folder, "merged.nc"))

In [7]:
merged_pr = xr.open_dataset(os.path.join(output_folder, "merged.nc"))

rolling_pr_sum = merged_pr['sum'].rolling(year=5, center=True, min_periods=1).mean()
rolling_over_20 = merged_pr['over_20'].rolling(year=5, center=True, min_periods=1).mean()
rolling_max_3day_sum = merged_pr['max_3day_sum'].rolling(year=5, center=True, min_periods=1).mean()
rolling_max_5day_sum = merged_pr['max_5day_sum'].rolling(year=5, center=True, min_periods=1).mean()
rolling_pr = merged_pr['pr'].rolling(year=5, center=True, min_periods=1).mean()
rolling_wet_days = merged_pr['wet_days'].rolling(year=5, center=True, min_periods=1).mean()
rolling_values = xr.Dataset({
    'sum_rolling': rolling_pr_sum,
    'over_20_rolling': rolling_over_20,
    'max_3day_sum_rolling': rolling_max_3day_sum,
    'max_5day_sum_rolling': rolling_max_5day_sum,
    'pr_rolling': rolling_pr,
    'wet_days_rolling': rolling_wet_days
    })

out_rolling_path = os.path.join(output_folder, "rolling_values.nc")
rolling_values.to_netcdf(out_rolling_path)