In [None]:
# filter some warning messages
import warnings
warnings.filterwarnings("ignore")

In [None]:
from myfunctions import *

In [None]:
datapd = pd.read_csv('List57.csv')

In [None]:
from dask.distributed import Client
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    memory = '8G',
    processes = 1,
    cores = 2, 
    nanny = True, 
    silence_logs = 'error')

cluster.scale(32)
client = Client(cluster)
client

In [None]:
p_save = '../../SO_data/data_siconc_w_area/'
p_nc = '../../../data/model/CMIP6/'

for i in range(0, len(datapd)):
    name = datapd.at[i, 'source_id']
    print("{} {}".format(i, name))
    
    # check if the resulting file exists
    if ispickleexists(name, p_save):
        print("    [o] data exist.")
        continue

    # open siconc data from nc file or zarr online (september only)
    selected_month = 9
    # check if siconc data is avalaible online
    if pd.isna(datapd.at[i, 'zstore_siconc']):
        ds = read_siconc(p_nc, name, selected_month)
    else:
        ds = open_from_cloud(datapd.at[i, 'zstore_siconc'])
        ds = select_month(ds, selected_month)

    icedata = ds.siconc
    if name in ['E3SM-2-0', 'E3SM-2-0-NARRM']:
        icedata = set_nan_to_zero(icedata)
    if name in ['GISS-E2-1-H', 'GISS-E2-2-H', 'INM-CM4-8']:
        icedata = set_zero_to_nan(icedata)

    nolatlon = False
    newlatlon = False
    
    if name == 'NESM3':
        newlatlon = ('lat', 'lon')
        
    dlat, dlon = get_latlon(datapd, i, icedata.isel(time=0), newlatlon, nolatlon)

    southlat = -40
    
    if ('area' in ds) or ('areacello' in ds):
        if 'area' in ds:
            if 'areacello' not in ds:
                ds = ds.rename({'area':'areacello'})
        dsg_data = ds.areacello
        if 'time' in dsg_data.dims:
            dsg_data = dsg_data.isel(time = 0)
        dsg_data = dsg_data.where(dsg_data>0)
        dlat_g, dlon_g = dlat, dlon
    else:
        if name == 'GISS-E2-2-H':
            nameg = 'GISS-E2-1-H'
        elif name == 'UKESM1-1-LL':
            nameg = 'UKESM1-0-LL'
        else:
            nameg = name

        if name == 'NESM3':
            varname = 'areacelli'
        else:
            varname = 'areacello'

        dsg = read_areacello(p_nc, nameg, varname) 
        dsg_data = dsg[varname]
        dsg_data = dsg_data.where(dsg_data>0)
        
        if name == 'CAS-ESM2-0': # CAS-ESM2-0 cell area grid has different name
            nolatlon = True
            newlatlon = ('lat', 'lon')

        if name in ['FGOALS-g3']:
            dsg_data = dsg_data.reindex(j=list(reversed(dsg_data.j)))
            dsg_data = dsg_data.assign_coords({'j':icedata.j})
        

        dlat_g, dlon_g = get_latlon(datapd, i, dsg_data, newlatlon, nolatlon)


    if dlat.shape != dlat_g.shape: # if not the same shape
        if name in ['CMCC-CM2-SR5', 'CMCC-ESM2']:
            dsg_data = dsg_data.isel(j = slice(1, len(dsg_data.j)-1)).isel(i = slice(0, len(dsg_data.i)-1))
            dsg_data = copy_xy(icedata, dsg_data)
            dlat_g, dlon_g = get_latlon(datapd, i, dsg_data, newlatlon, nolatlon)
        if name in ['NorESM2-MM', 'NorESM2-LM']:
            dsg_data = dsg_data.isel(j = slice(0, len(dsg_data.j)-1))
            dlat_g, dlon_g = get_latlon(datapd, i, dsg_data, newlatlon, nolatlon)
            
    if dlat.shape == dlat_g.shape: # if same shape
        if (np.nanmax(np.abs(dlat.values - dlat_g.values)) < 10e-4) and (np.nanmax(np.abs(dlon.values - dlon_g.values)) < 10e-4):
            if np.isnan(dlat).any() or np.isnan(dlon).any(): # if coords in ice data is not complete
                if np.isnan(dlat_g).any() or np.isnan(dlon_g).any(): # if coords in area data is not complete
                    newdlon, newdlat = newxy_fmissingxy(dlon_g, dlat_g)
                    ds_siconc = drop_coords(icedata)
                    new_ds = create_new_ds(ds_siconc, dsg_data, newdlat, newdlon)
                else:
                    ds_siconc = copy_xy(dsg_data, icedata)
                    ds_siconc = drop_coords(ds_siconc)
                    new_ds = create_new_ds(ds_siconc, dsg_data, dlat_g, dlon_g)
            else:
                new_ds = create_new_ds(icedata, dsg_data, dlat, dlon)
        else:
            if pd.isna(datapd.at[i, 'latname']):
                dsg_data = calculate_area_xy(ds)
                new_ds = create_new_ds(icedata, dsg_data, dlat, dlon)
            else:
                if name in ['CESM2-WACCM-FV2', 'NorESM2-MM', 'NorESM2-LM','FGOALS-g3']:
                    ds_siconc = copy_xy(dsg_data, icedata)
                    ds_siconc = drop_coords(ds_siconc)
                    new_ds = create_new_ds(ds_siconc, dsg_data, dlat_g, dlon_g)
                elif name in ['CAS-ESM2-0']:
                    dlonnew, dlatnew = newxy_fmissingxy(dlon, dlat)
                    dsg_data = copy_xy(dlat, dsg_data)
                    dsg_data = drop_coords(dsg_data)
                    new_ds = create_new_ds(ds_siconc, dsg_data, dlatnew, dlonnew)
                    
    
    new_ds_south = new_ds.where(new_ds['newlat'] < southlat, drop = True)
    new_ds_south.load()
    savepickle(name, p_save, new_ds_south)
    print('    [*] Saved.')

In [None]:
client.close()
cluster.close()

In [None]:
# list_not_complete_latlon = ['FGOALS-g3', 'CAS-ESM2-0', 'CIESM', 'TaiESM1', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 'SAM0-UNICON', 'NorESM2-MM', 'NorESM2-LM']
# list_not_complete_latlon_area = ['CESM2-FV2']
# list_not_equal_coords = ['NESM3', 'CMCC-CM2-SR5', 'CMCC-ESM2', 'NorESM2-MM', 'NorESM2-LM']

Some exceptions:

8 NESM3
 
    Dimensions (siconc): ('time', 'nj', 'ni') : (1200, 384, 320)
    
    Dimensions (areacello): ('nj', 'ni') : (292, 362)

12 CMCC-CM2-SR5
    
    Dimensions (siconc): ('time', 'i', 'j') : (3000, 291, 360)
    
    Dimensions (areacello): ('i', 'j') : (292, 362)

13 CMCC-ESM2
    
    Dimensions (siconc): ('time', 'i', 'j') : (3000, 291, 360)
    
    Dimensions (areacello): ('i', 'j') : (292, 362)

46 NorESM2-MM
    
    Dimensions (siconc): ('time', 'j', 'i') : (120, 384, 360)

    Dimensions (areacello): ('j', 'i') : (385, 360)

47 NorESM2-LM
    
    Dimensions (siconc): ('time', 'j', 'i') : (120, 384, 360)
    
    Dimensions (areacello): ('j', 'i') : (385, 360)

! Note (why some models are not chosen)

'NorESM1-F': no temp&salt data, no mlotst on CMIP6 site

'INM-CM5-0': have temp but no salt data(only global salt) and no mlotst data

'AWI-CM-1-1-MR': no monthly siconc data

'FIO-ESM-2-0': no salt data

--

other note

'IPSL-CM6A-MR1': have area data in thetao
