In [1]:
import xarray as xr
import glob
import os

In [2]:
'''
This function opens the file and put them in a list (the file pattern given) 
of a list (every query of that file pattern).

file_pattern must be a list.
example: 

LIST file_pattern = ['b.e11.B1850C5CN.f09_g16.005.cam.h0.FLNT.*.nc', 
                'b.e11.B1850C5CN.f09_g16.005.cam.h0.FSNT.*.nc']
would open all of these files and output a list like this:
variables = ['['...FLNT.001.nc', '...FLNT.002.nc', '...FLNT.X.nc'], 
              ['...FSNT.001.nc', '...FSNT.002.nc', '...FSNT.X.nc']']
'''
def Set_Variables(file_pattern):
    variables = []
    for order in range(len(file_pattern)):
        temp_var = []
        file_paths = glob.glob(file_pattern[order])
        for file_path in file_paths:
            # Open the dataset
            ds = xr.open_dataset(file_path)
            temp_var.append(ds)
            ds.close()
        variables.append(temp_var)
    return variables

In [8]:
#Make a new file name. This only works for B1850C5CN name convention.
def NewFile(prefix, var, number):
    newname = []
    newname.append(f"{prefix}.{var}.{number}.nc")
    return newname[0]

# !!!These two functions below could be improved. Currently, variables aren't taking into account.!!!
'''
This applies to both Cal_FluxT and Cal_FluxS

Input:
LIST Variables. use Set_Variables for the correct format.
INTEGER Number = selecting the year. For example, 0 would select the first file of both variables.

Output:
xarray dataset file
'''
# Calculate Top of Atmosphere surface flux.
def Cal_FluxT(number):
    Derived = xr.merge([variables[1][number], variables[0][number]])
    skeleton_ds = Derived.drop_vars(list(Derived.data_vars))  
    new_ds = skeleton_ds.copy(deep=True) 
    new_ds['NetFluxT'] = Derived.FSNT + Derived.FLNT
    return new_ds

# Calculate net surface flux.
def Cal_FluxS(number):
    Derived = xr.merge([variables[0][number], variables[1][number], variables[2][number], variables[3][number], variables[4][number], variables[5][number]])
    skeleton_ds = Derived.drop_vars(list(Derived.data_vars))  
    new_ds = skeleton_ds.copy(deep=True) 
    new_ds['NetFluxS'] = Derived.FLDS + Derived.FLNS + Derived.FSDS + Derived.FSNS + Derived.LHFLX + Derived.SHFLX
    return new_ds

In [5]:
'''
These two functions serve as a place to have a correct naming convention for numbers for B1850C5CN
When to use them?
Example: b.e11.B1850C5CN.f09_g16.005.cam.h0.NewVariable.[NUMBER].nc
This function format the correct [NUMBER]

End_Number is used in Date_List. Ignore them.

Date_List is what to call
Input:
INTEGER i = The file location. They should be the same as the one used in (number) for Cal_FluxT or Cal_FluxS
INTEGER max = the end placement. This is necessary if adjustments at the end are needed.
INTEGER adjustamount = how much must the adjustments at the endneed to be made. 
                0 for B compset. 100 for F compset, 200 for E compset.
BOOLEAN Adjust = Do you need adjustment at the beginning? True/False
'''
def End_Number(x):
    y = x + (9 * (10**3)) + (9 * (10**2)) + (1 * 10) + 1
    return y

# n is the placement, max is the end placement, offset is starting placement, adjust is asking whether adjustment at the beginning is needed
def Date_List(i, max, offset, adjust, adjustamount):
    x=0
    y=0
    x = (i+offset) * (10**4) + 1
    y = End_Number(x)
    # adjustment at the end
    if i == max:
        y = y + adjustamount
    # adjustment at the beginning, if needed
    if adjust == True:
        if i == 0:
            x = x + 100
    x = (f"{x:06d}")
    y = (f"{y:06d}")
    return (f"{x}-{y}")

In [13]:
'''
Output_folder = where do you want this stored
models = what models are we using
factors = variables going to be used in Surface Fluxes
factort = variables going to be used in TOA Fluxes
'''
output_folder = '/data/cristi/a/cristi/data/LENS/LE_control/NetFlux'
models = ['b.e11.B1850C5CN.f09_g16.005.cam.h0', 'e.e11.E1850C5CN.f09_g16.001.cam.h0', 'f.e11.F1850C5CN.f09_f09.001.cam.h0']
factors = ["FLDS", "FLNS", "FSDS", "FSNS", "LHFLX", "SHFLX"]
factort = ["FSNT", "FLNT"]
# ==========
# compsets
# ==========

# NetFlux S

for compset in range(len(models)):
    # Config for B Compset
    if compset == 0:
        max = 18
        offset = 4
        adjust = False
        adjustamount = 0
    # Config for E compset
    if compset == 1:
        max = 9
        offset = 1
        adjust = True
        adjustamount = 200
    #Config for F compset
    if compset == 2:
        max = 26
        offset = 0
        adjust = True
        adjustamount = 100
    # === Processing ===
    file_pattern = []
    for n in range(len(factors)):
        # factors = the variables used in Surface Fluxes.
        file_pattern.append(NewFile(models[compset], factors[n], "*"))
    variables = []
    variables = Set_Variables(file_pattern)
    for i in range(max):
        fixed_number = Date_List(i, max-1, offset, adjust, adjustamount)
        Derived = []
        file_path = None
        file_path = NewFile(models[compset], "NetFluxS", fixed_number)
        output_path = os.path.join(output_folder, file_path)
        if not os.path.exists(output_path):
            Derived = Cal_FluxS(i)
            Derived.to_netcdf(output_path)

# NetFlux T

    file_pattern = []
    for n in range(len(factort)):
        # factors = the variables used in TOA Fluxes.
        file_pattern.append(NewFile(models[compset], factort[n], "*"))
    variables = []
    variables = Set_Variables(file_pattern)
    for i in range(max):
        fixed_number = Date_List(i, max-1, offset, adjust, adjustamount)
        Derived = []
        file_path = None
        file_path = NewFile(models[compset], "NetFluxT", fixed_number)
        output_path = os.path.join(output_folder, file_path)
        if not os.path.exists(output_path):
            Derived = Cal_FluxT(i)
            Derived.to_netcdf(output_path)