In [98]:
import xarray as xr
import pandas as pd
import numpy as np
import cftime
import glob
import os
from IPython.display import clear_output
# location of this script: /glade/work/sglanvil/CCR/yeager/fix_daily_files.ipynb

ivar = "PRECTMX"
baseDir = "/glade/campaign/cesm/development/espwg/CESM2-DP/timeseries/"
for iyear in range(1990, 2000):
    for imbr in range(11, 31):
        member_str = f"{imbr:03d}"
        member_dir = f"{baseDir}b.e21.BSMYLE.f09_g17.{iyear}-11.{member_str}/atm/proc/tseries/day_1/badLength/"
        nc_files = glob.glob(os.path.join(member_dir, f"*.{ivar}.*.nc"))
        clear_output(wait=True)  # Clears the current output
        print(f"Processing: {iyear} {imbr}")
        for nc_file in nc_files:
            ds = xr.open_dataset(nc_file)
            filename = os.path.basename(nc_file)
            time_length = len(ds['time'])
            if time_length < 2982:
                print("need to fix time")
                print(nc_file)
                fileOut = f"{baseDir}b.e21.BSMYLE.f09_g17.{iyear}-11.{member_str}/atm/proc/tseries/day_1/{filename}"
                time_units = ds['time'].encoding.get('units', None)
                time_vals = ds['time'].values
                time_strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in time_vals]
                current_time_range = pd.to_datetime(time_strings)
                start_date = current_time_range.min()
                end_date = current_time_range.max()
                full_time_range = pd.date_range(start=start_date, end=end_date, freq="D")
                full_time_range = full_time_range[~((full_time_range.month == 2) & (full_time_range.day == 29))]
                full_time_range_cftime = np.array([cftime.DatetimeNoLeap(x.year, x.month, x.day) for x in full_time_range])
                ds = ds.drop_vars('time_bnds', errors='ignore')
                
                date_written = ds['date_written']
                date_written_extended = xr.DataArray(np.repeat(date_written.isel(time=0).values,
                                                               len(full_time_range_cftime)), dims="time")
                date_written_extended = date_written_extended.astype('S8')
                
                time_written = ds['time_written']
                time_written_extended = xr.DataArray(np.repeat(time_written.isel(time=0).values,
                                                               len(full_time_range_cftime)), dims="time")
                time_written_extended = time_written_extended.astype('S8')

                ds_filled = ds.interp(time=full_time_range_cftime)
                time_numeric = cftime.date2num(full_time_range_cftime, time_units[0:-9])
                time_bnds = np.column_stack((time_numeric - 1, time_numeric))
                ds_filled['time_bnds'] = (('time', 'nbnd'), time_bnds)
                ds_filled['date_written'] = date_written_extended
                ds_filled['time_written'] = time_written_extended

                ds_filled['time_bnds'].attrs['long_name'] = "time interval endpoints"
                ds_filled['time'].encoding['units'] = str(time_units)
                ds_filled[f'{ivar}'] = ds_filled[f'{ivar}'].astype(np.float32)
                ds_filled['time_bnds'] = ds_filled['time_bnds'].astype(np.double)
                ds_filled.to_netcdf(fileOut)



Processing: 1999 30
need to fix time
/glade/campaign/cesm/development/espwg/CESM2-DP/timeseries/b.e21.BSMYLE.f09_g17.1999-11.030/atm/proc/tseries/day_1/badLength/b.e21.BSMYLE.f09_g17.1999-11.030.cam.h1.PRECTMX.20011031-20091231.nc
