### Creating new forcing files for ACCESS-OM2-01
- Maurice F. Huguenin
- m.huguenin-virchaux@unsw.edu.au
- 11th of April 2024

In [None]:
%%time
from scipy.interpolate import interp1d # using scipy here for the interpolation for N34 onto higher resolution
base = '/g/data/qv56/replicas/input4MIPs/CMIP6/OMIP/MRI/MRI-JRA55-do-' + forcing + '/'
var = ['huss','psl','tas', 'uas','vas','prra','prsn','rlds','rsds','friver'] # only eight of the variables I used in my Msc
var2 = ['huss_10m','psl','tas_10m', 'uas_10m','vas_10m','prrn','prsn','rlds','rsds','friver']
var3 = ['q_10','slp','t_10', 'u_10','v_10','rain','snow','rlds','rsds','runoff_all'] # filename name variable on /g/data/ua8 for RYF forcing
  
for e in [1]:#range(2): # loop through the El Niño and La Niña events
    if e == 0: 
        suffix = 'ENFull'; save_folder = 'forcing_mean_anoms_ENFull/'
        print('Creating anomalous forcing fields for '+suffix+', 2-yr EXP:')
        ts = EN_mean # the original time series
    if e == 1: 
        suffix = 'LNFull'; save_folder = 'forcing_mean_anoms_LNFull/'
        print('Creating anomalous forcing fields for '+suffix+', 4-yr EXP:')
        ts = LN_mean # the original time series
    print('----------------------------------------------------------')
    
    for i in range(len(var)): # loop through all variables
        if var[i] in ['huss','psl','tas','ts','uas','vas']: domain = 'atmos/3hrPt/'; timestep = ['199001010000','199012312230']; length_year = 2920
        if var[i] in ['prra','prsn','rlds','rsds']:         domain = 'atmos/3hr/';   timestep = ['199001010130','199101010000']; length_year = 2920
        if var[i] in ['friver']:                            domain = 'ocean/day/';   timestep = ['19900101','19910101'];         length_year = 365
        
        # load in JRA55-do field, RYF field and copy the latter to get the correct .nc file structure
        JRA_field = xr.open_dataset(base+domain+var[i]+'/gn/v20180412/'+
                        var[i]+'_input4MIPs_atmosphericState_OMIP_MRI-JRA55-do-'+forcing+'_gn_'+timestep[0]+'-'+timestep[1]+'.nc')[var[i]]
        RYF_field = xr.open_dataset('/g/data/ua8/JRA55-do/RYF/v1-3/'+'RYF.'+var3[i]+'.1990_1991.nc')[var2[i]]
        # RYF_field_final = RYF_field # copy file structure to write the forcing data into it
        RYF_field_final = RYF_field.where(RYF_field > 27, other=0).where(RYF_field < 27, other=0) # copy file structure and fill with only zero values
                                                                                                  # later, I then fill up this array each time step with the forcing data

        if var[i] != 'friver': # special case for the daily runoff field (all other fields are 3-hourly)                  
            pattern = xr.open_dataset('/g/data/e14/mv7494/ENSOAnt_input/spatial_patterns/sp_mean_anoms_'+suffix[0:2]+'.nc')[var2[i]].load()
        else: 
            pattern = xr.open_dataset('/g/data/e14/mv7494/ENSOAnt_input/spatial_patterns/sp_mean_anoms_'+suffix[0:2]+'_friver.nc')[var2[i]].load()
        print(domain+var[i])
        
        # Matt's part from here
        # --------------------------------- part with leap day correction --------------------------------- #
        # period = 7 * 365.25 / 365 # years. NB: Need to scale the period to get rid of the leap year calendar issue that was not resolved as at 31/08/21.
        num_of_leap_day_losses = np.floor((tstart - 1900)/4 - (tstart - 1900)/100 + (tstart - 1900)/400) # number of leaps days between tstart and 1900
        zeroYR = tstart - 1900 + num_of_leap_day_losses / 365  # years. determines only when the zero in the time series sinusoid, not the zero that access sees                

        # Extract time series and structure from original files:
        timeRYF = RYF_field.time.values
        timeFULL = np.concatenate(np.array([np.array([cftime.DatetimeNoLeap(year,x.month,x.day,x.hour,0,0,0) for x in timeRYF]) for year in np.arange(tstart,tend+1)])).ravel()
        # hmaurice: 
        from datetime import timedelta
        timeFULL = timeFULL + timedelta(days=num_of_leap_day_losses) # shift the full date time array by the number of leap days forward
        timeYR = np.array([(x-cftime.DatetimeNoLeap(1900,1,1,0,0,0,0)).total_seconds()/seconds_in_year for x in timeFULL])     

        # interpolate Nino3.4 time series to correct temporal resolution of input file
        time_series = np.interp(np.linspace(1,len(EN_mean),len((timeYR-zeroYR))), np.linspace(0,len(EN_mean),len(EN_mean)), EN_mean)
        
        if e == 0:          # np.interp(target length: either 2*2920 or 4*2920                        , original length, either 24 or 48 months , original time series)
            N34_JRA_highres = np.interp(np.linspace(1,len(EN_mean),length_year*(int(len(EN_mean)/12))), np.linspace(0,len(EN_mean),len(EN_mean)), EN_mean)  
        if e == 1: 
            N34_JRA_highres = np.interp(np.linspace(1,len(LN_mean),length_year*(int(len(LN_mean)/12))), np.linspace(0,len(LN_mean),len(LN_mean)), LN_mean)
        # ---------------------------------------------------------------------------------------------------- #
        
        for f in [1,2,3,4]: # loop through the number of years
            if f == 1: 
                time_series = N34_JRA_highres[0:length_year] # 0:2920
            if f == 2: 
                time_series = N34_JRA_highres[length_year:(2*length_year)] # 2920:5840
            if f == 3 and e == 0:
                break # for the El Niño simulation, this loop is finished early, i.e., already after two years since it's a short 2-year simulation 
                      # rather than the 4-year La Niña simulation
            if f == 3 and e == 1: # if La Niña, then my time series are four years long, so I need to split the full interpolated time series into four parts
                time_series = N34_JRA_highres[(length_year*2):(3*length_year)] 
            if f == 4 and e == 1:
                time_series = N34_JRA_highres[(length_year*3):(4*length_year)]           
            
            # skip iteration if file already exists
            if os.path.isfile('/g/data/e14/mv7494/ENSOAnt_input/'+save_folder+'yr'+str(f)+'/'+'RYF.'+var2[i]+'.1990_1991.nc') == True:
                print('Year '+str(f)+' file finished: RYF.'+var2[i]+'.1990_1991.nc')
                continue  

            for t in range(length_year):
                # if t % 1000 == 0: print('time step: '+str(t)) # print every 1000th iteration to see how far the loop is ahead
                
                # --- Here the key part: climatological forcing + anomalies (time series[t] * spatial pattern[x,y]) --- #
                RYF_field_final[t,:,:] = RYF_field[t,:,:] + (pattern.values*time_series[t])                             #
                # JRA_field_input[t,:,:] = pattern.values # for testing                                                 #
                # ----------------------------------------------------------------------------------------------------- #
         
            RYF_field_final.to_netcdf('/g/data/e14/mv7494/ENSOAnt_input/'+save_folder+'yr'+str(f)+'/'+
                                      'RYF.'+var2[i]+'.1990_1991.nc')
            print('Year '+str(f)+' file finished: RYF.'+var2[i]+'.1990_1991.nc')
        print('--- --- --- --- ---')
# Wall time: 13 s                    for one variable and both experiments
# Wall time: 3min 9s                 for all variables and one experiment
# Wall time: 11min 23s               for all variables and both El Niño (2-year) and La Niña (4-year simulation)