In [1]:
# filter some warning messages
import warnings
warnings.filterwarnings("ignore")

In [2]:
import numpy as np
import xarray as xr
import pandas as pd

import os
import glob
import pickle
import gcsfs
import gc

In [3]:
datapd = pd.read_csv('List_model.csv')

In [4]:
def ispickleexists(n, p0):
    p = p0 + n + '.pickle'
    if os.path.exists(p):
        # print('    [o] {} exists.'.format(p))
        return True
    else:
        return False

def openpickle(n, p0):
    p = p0 + n + '.pickle'
    d = pd.read_pickle(p)
    # with open(p, 'rb') as df:
    #     d = pickle.load(df)
    return d

def savepickle(n, p0, sf):
    p = p0 + n + '.pickle'
    with open(p, 'wb') as wf:
        pickle.dump(sf, wf, pickle.HIGHEST_PROTOCOL)

def open_from_cloud(link):
    gcs = gcsfs.GCSFileSystem(token='anon')
    mapper = gcs.get_mapper(link)
    ## open it using xarray and zarr
    ds = xr.open_zarr(mapper, consolidated=True)
    return ds

def open_nc(mf):
    if len(mf)>50:
        ds = xr.open_mfdataset(mf[0], use_cftime=True)
        for i in range(1, len(mf)):
            ds0 = xr.open_mfdataset(mf[i], use_cftime=True)
            ds = xr.concat([ds, ds0], dim="time")
        # ds = xr.open_mfdataset(mf, use_cftime=True)
    else:
        ds = xr.open_mfdataset(mf, use_cftime=True)
    return ds

def select_month(da, n):
    return da.isel(time=(da.time.dt.month == n))

def skip_unstructure_grid(datapd, i):
    if pd.isna(datapd.at[i, 'xname']):
        print("    [x] doesn't have regular grid.")
        return True
    else:
        return False

def newxy_fmissingxy(dx, dy):
    dx = dx.where((dx>-361) & (dx<361))
    dy = dy.where((dy>-91) & (dy<91))
    newx0 = dx[~np.isnan(dx).any(axis=1)][0]
    newy0 = dy[:, ~np.isnan(dy).any(axis=0)][:,0]
    newx, newy = np.meshgrid(newx0, newy0)
    x = np.where(np.isnan(dx), newx, dx)
    y = np.where(np.isnan(dy), newy, dy)
    return x, y

In [6]:
p_nc = '../../../data/model/CMIP6/'
p0 = '../../SO_data/data_mlotst/'

for i in range(0, len(datapd)):
    name = datapd.at[i, 'source_id']
    print("{} {}".format(i, name))
    if skip_unstructure_grid(datapd, i):
        continue
    if ispickleexists(name, p0):
        print("    [o] existed.")
        continue
    if pd.isna(datapd.at[i, 'zstore_mlotst']):
        data_path = p_nc + 'mlotst_Omon_' + datapd.at[i, 'source_id'] + '_piControl_' + '*' + '.nc'
        matching_files = glob.glob(data_path)
        if len(matching_files) == 0:
            print("    [x] no data.")
            continue
        else:
            # ds_mlotst = xr.open_mfdataset(matching_files[430], use_cftime=True)
            ds = open_nc(matching_files)
    else:
        ds = open_from_cloud(datapd.at[i, 'zstore_mlotst'])
    
    ds = select_month(ds, 9)
    
    if not pd.isna(datapd.at[i, 'latname']) and (name != 'CAS-ESM2-0'):
        dlat = ds[datapd.at[i, 'latname']].load()
        dlon = ds[datapd.at[i, 'lonname']].load()
        dlat = dlat.where(dlat <= 90).where(dlat>=-90)
        dlon = dlon.where(dlon <= 360).where(dlon>=-360)    
        if np.isnan(dlat).any() or np.isnan(dlon).any():
            print("        Not complete lat/lon coordinates")
            x, y = newxy_fmissingxy(dlon, dlat)
            ds[datapd.at[i, 'latname']] = (ds[datapd.at[i, 'latname']].dims, y)
            ds[datapd.at[i, 'lonname']] = (ds[datapd.at[i, 'lonname']].dims, x)
            dlat = ds[datapd.at[i, 'latname']]
        da_south = ds.mlotst.where(dlat<-50, drop = True)
    else:
        if name == 'CAS-ESM2-0':
            yname = 'lat'
        else:
            yname = datapd.at[i, 'yname']
        da_south = ds.mlotst.sel({yname: slice(-90, -50)})
    
    da_south = da_south.load()
    savepickle(name, p0, da_south)
    print("    [*] finished.")
    gc.collect()

0 BCC-CSM2-MR
    [o] existed.
1 BCC-ESM1
    [o] existed.
2 CAMS-CSM1-0
    [o] existed.
3 ACCESS-ESM1-5
    [o] existed.
4 ACCESS-CM2
    [o] existed.
5 GFDL-CM4
    [o] existed.
6 GFDL-ESM4
    [o] existed.
7 KIOST-ESM
    [o] existed.
8 NESM3
    [o] existed.
9 CanESM5
    [o] existed.
10 CanESM5-1
    [o] existed.
11 CanESM5-CanOE
    [o] existed.
12 CMCC-CM2-SR5
    [o] existed.
13 CMCC-ESM2
    [o] existed.
14 EC-Earth3
    [o] existed.
15 EC-Earth3-CC
    [o] existed.
16 EC-Earth3-LR
    [o] existed.
17 EC-Earth3-Veg
    [o] existed.
18 EC-Earth3-Veg-LR
    [o] existed.
19 EC-Earth3-AerChem
    [o] existed.
20 CNRM-CM6-1
    [o] existed.
21 CNRM-CM6-1-HR
    [o] existed.
22 CNRM-ESM2-1
    [o] existed.
23 HadGEM3-GC31-LL
    [o] existed.
24 HadGEM3-GC31-MM
    [o] existed.
25 UKESM1-0-LL
    [o] existed.
26 UKESM1-1-LL
    [o] existed.
27 IPSL-CM5A2-INCA
    [o] existed.
28 IPSL-CM6A-LR
    [o] existed.
29 IPSL-CM6A-MR1
    [o] existed.
30 MIROC6
    [o] existed.
31 MIROC-ES2L


In [16]:
p_nc = '../../../data/model/CMIP6/'
p0 = '../../SO_data/data_mlotst/'

for i in range(0, len(datapd)):
    name = datapd.at[i, 'source_id']
    print("{} {}".format(i, name))


    if pd.isna(datapd.at[i, 'zstore_mlotst']):
        data_path = p_nc + 'mlotst_Omon_' + datapd.at[i, 'source_id'] + '_piControl_' + '*' + '.nc'
        matching_files = glob.glob(data_path)
        if len(matching_files) == 0:
            print("    [x] no data.")
            continue
        else:
            # ds_mlotst = xr.open_mfdataset(matching_files[430], use_cftime=True)
            ds = open_nc(matching_files)

    else:
        ds = open_from_cloud(datapd.at[i, 'zstore_mlotst'])

    if 'comment' in ds.mlotst.attrs:
        print(ds.mlotst.attrs['comment'])

    
    # ds = select_month(ds, 9)
    
    # if not pd.isna(datapd.at[i, 'latname']) and (name != 'CAS-ESM2-0'):
    #     dlat = ds[datapd.at[i, 'latname']].load()
    #     dlon = ds[datapd.at[i, 'lonname']].load()
    #     dlat = dlat.where(dlat <= 90).where(dlat>=-90)
    #     dlon = dlon.where(dlon <= 360).where(dlon>=-360)    
    #     if np.isnan(dlat).any() or np.isnan(dlon).any():
    #         print("        Not complete lat/lon coordinates")
    #         x, y = newxy_fmissingxy(dlon, dlat)
    #         ds[datapd.at[i, 'latname']] = (ds[datapd.at[i, 'latname']].dims, y)
    #         ds[datapd.at[i, 'lonname']] = (ds[datapd.at[i, 'lonname']].dims, x)
    #         dlat = ds[datapd.at[i, 'latname']]
    #     da_south = ds.mlotst.where(dlat<-50, drop = True)
    # else:
    #     if name == 'CAS-ESM2-0':
    #         yname = 'lat'
    #     else:
    #         yname = datapd.at[i, 'yname']
    #     da_south = ds.mlotst.sel({yname: slice(-90, -50)})
    
    # da_south = da_south.load()
    # savepickle(name, p0, da_south)
    # print("    [*] finished.")
    # gc.collect()

0 CAMS-CSM1-0
Sigma T is potential density referenced to ocean surface.
1 BCC-CSM2-MR
Sigma T is potential density referenced to ocean surface.
2 BCC-ESM1
Sigma T is potential density referenced to ocean surface.
3 KIOST-ESM
Sigma T is potential density referenced to ocean surface.
4 ACCESS-ESM1-5
Sigma T is potential density referenced to ocean surface.
5 ACCESS-CM2
Sigma T is potential density referenced to ocean surface.
6 GFDL-CM4
Model data on the 1x1 grid includes values in all cells for which any ocean exists on the native grid. For mapping purposes, we recommend using a land mask such as World Ocean Atlas to cover these areas of partial land.  For calculating approximate integrals, we recommend multiplying by cell area (areacello).
7 GFDL-ESM4
8 TaiESM1
    [x] no data.
9 SAM0-UNICON
    [x] no data.
10 CIESM
    [x] no data.
11 CESM2
Sigma T is potential density referenced to ocean surface.
12 CESM2-FV2
HMXL_DR
13 CESM2-WACCM
Sigma T is potential density referenced to ocean su

In [17]:
ds.mlotst.attrs

{'cell_measures': 'area: areacello',
 'cell_methods': 'area: mean where sea time: mean',
 'comment': 'Sigma T is potential density referenced to ocean surface.',
 'history': '2020-01-31T10:33:00Z altered by CMOR: replaced missing value flag (-9.99e+33) and corresponding data with standard missing value (1e+20).',
 'long_name': 'Ocean Mixed Layer Thickness Defined by Sigma T',
 'standard_name': 'ocean_mixed_layer_thickness_defined_by_sigma_t',
 'units': 'm'}