In [None]:
# filter some warning messages
import warnings
warnings.filterwarnings("ignore")

In [None]:
import numpy as np
import xarray as xr
import pandas as pd

import gsw
import copy
import os
import glob
import pickle

In [None]:
datapd = pd.read_csv("all_new.csv")

In [None]:
import zarr
import gcsfs
# this only needs to be created once
gcs = gcsfs.GCSFileSystem(token='anon')

In [None]:
path_data = '../../../data/model/CMIP6/'

In [None]:
def cal_mld(sigma0, lev):
    b0 = sigma0[lev].where(~sigma0.isnull()).max(dim = lev)
    sigma0_10 = sigma0.interp({lev: 10})
    mld0 = sigma0[lev].where(sigma0 - sigma0_10 <= 0.03).max(dim = lev)
    mld1 = sigma0[lev].where(sigma0[lev] > mld0).min(dim = lev)
    sigma0_cal = sigma0.where((sigma0[lev] >= mld0) & (sigma0[lev] <= mld1))
    cal_min = sigma0_cal.min(dim = lev)
    cal_max = sigma0_cal.max(dim = lev)
    mld2 = (mld1 - mld0)/(cal_max - cal_min) * (sigma0_10 + 0.03 - cal_min) + mld0
    mld = xr.where(mld0 < b0, mld2, b0)
    return mld

In [None]:
from dask.distributed import Client
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    memory = '8G',
    processes = 1,
    cores = 2, 
    nanny = True, 
    silence_logs = 'error')

cluster.scale(32)
client = Client(cluster)
client

In [None]:
for f in os.listdir(path_data):
    if f != 'GISS-E2-2-H':
        continue
    new_path = path_data + '/' + f
    fpath = 'data_mld/' + f + '.pickle'
    # print(new_path)

    sf = new_path + "/so*.nc"
    tf = new_path + "/thetao*.nc"
    # print(new_sof)
    dss = xr.open_mfdataset(sf)
    dst = xr.open_mfdataset(tf)

    das = dss.so
    dat = dst.thetao
    
    if 'type' in das.coords:
        das = das.reset_coords('type', drop = True)
        dat = dat.reset_coords('type', drop = True)
    
    da_sigma0 = gsw.sigma0(das.where(das > 0), dat.where(dat != 0))

    levname = datapd.loc[datapd['source_id'] == f]['zname'].values[0]
    if 'units' in das[levname].attrs:
        if das[levname].units == 'centimeters':
            da_sigma0[levname] = da_sigma0[levname]/100 # unit: cm --> m

    if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['latname']):
        if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['xname']):
            print("{} doesn't have regular grid.".format(f))
            continue
        else:
            da_south = da_sigma0.sel({datapd.loc[datapd['source_id'] == f].iloc[0]['yname']: slice(-90, -50)})
            # da_south = da_sigma0.where(da_sigma0[datapd.at[i, 'xname']]< -50, drop=True)
            pltx0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['xname']]
            plty0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['yname']]
            pltx, plty = np.meshgrid(pltx0, plty0)
    else:
        da_south = da_sigma0.where(da_sigma0[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']] < -50, drop=True)
        pltx = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['lonname']].load()
        plty = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']].load()
    
    if len(np.shape(pltx)) > 2:
        pltx = pltx.isel(time = 0)
        plty = plty.isel(time = 0)

    da_sep = list(da_south.groupby('time.month'))[8][-1]
    da_mld = cal_mld(da_sep, levname)
    
    mldmax = da_mld.max("time")
    mld2000 = da_mld.where(da_mld >= 2000).count('time')
    mld2000_frq = mld2000.where(mld2000>0)/len(da_mld.time)
    
    newd = {"mldmax":mldmax.load(), 
            "mld2kfq":mld2000_frq.load(), 
            "pltx":pltx, 
            "plty":plty}
    
    with open(fpath, 'wb') as wf:
        pickle.dump(newd, wf, pickle.HIGHEST_PROTOCOL)
        
    print("{}: finished".format(f))


    break

In [None]:
for f in os.listdir(path_data):
    if f != 'IPSL-CM5A2-INCA':
        continue
    new_path = path_data + '/' + f
    all_files = os.listdir(new_path)
    s_files = [file for file in all_files if file.startswith("so_Omon")]
    t_files = [file for file in all_files if file.startswith("thetao_Omon")]

    s_files.sort()
    t_files.sort()
    
    # Iterate through the pairs of files
    fn = 0
    for s_file, t_file in zip(s_files, t_files):
        s_file_path = os.path.join(new_path, s_file)
        t_file_path = os.path.join(new_path, t_file)
        
        s_longname = s_file.split('_')[1:]
        t_longname = t_file.split('_')[1:]
        
        if s_longname == t_longname:

            chunksize = {"time":24,
                         datapd.loc[datapd['source_id'] == f].iloc[0]['xname']:180,
                         datapd.loc[datapd['source_id'] == f].iloc[0]['yname']:180}

            dss = xr.open_dataset(s_file_path, chunks=chunksize)
            dst = xr.open_dataset(t_file_path, chunks=chunksize)

            if 'type' in das.coords:
                das = das.reset_coords('type', drop = True)
                dat = dat.reset_coords('type', drop = True)

            levname = datapd.loc[datapd['source_id'] == f]['zname'].values[0]

            fns = np.linspace(0, len(dss.so.time), 11)
            for fn in range(0, len(fns)-1):
                savepath = 'data_mld0/' + f + '_' + str(fn) + '.pickle'
                if os.path.exists(savepath):
                    continue

                das = dss.so.isel(time = slice(int(fns[fn]), int(fns[fn+1])))
                dat = dst.thetao.isel(time = slice(int(fns[fn]), int(fns[fn+1])))

                da_sigma0 = gsw.sigma0(das.where(das > 0), dat.where(dat != 0))
                
                if 'units' in das[levname].attrs:
                    if das[levname].units == 'centimeters':
                        da_sigma0[levname] = da_sigma0[levname]/100 # unit: cm --> m
                        
                
                if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['latname']):
                    if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['xname']):
                        print("{} doesn't have regular grid.".format(f))
                        continue
                    else:
                        da_south = da_sigma0.sel({datapd.loc[datapd['source_id'] == f].iloc[0]['yname']: slice(-90, -50)})
                        # da_south = da_sigma0.where(da_sigma0[datapd.at[i, 'xname']]< -50, drop=True)
                        pltx0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['xname']]
                        plty0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['yname']]
                        pltx, plty = np.meshgrid(pltx0, plty0)
                else:
                    da_south = da_sigma0.where(da_sigma0[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']] < -50, drop=True)
                    pltx = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['lonname']].load()
                    plty = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']].load()
                
                if len(np.shape(pltx)) > 2:
                    pltx = pltx.isel(time = 0)
                    plty = plty.isel(time = 0)
                    
                da_sep = list(da_south.groupby('time.month'))[8][-1]
                da_mld = cal_mld(da_sep, levname)
                da_mld_new = da_mld.load()
                
                with open(savepath, 'wb') as wf:
                    pickle.dump(da_mld_new, wf, pickle.HIGHEST_PROTOCOL)

        # else:
        #     print(s_longname)
        #     print(t_longname)
        #     break
            # if fn == 0:
            #     da_mld_all = da_mld_new.load()
            # else:
            #     da_mld_all = xr.concat([da_mld_all, da_mld_new], dim="time")
            # if fn > 2:
            #     break
            # fn = fn + 1
            
        

    # mldmax = da_mld.max("time")
    # mld2000 = da_mld.where(da_mld >= 2000).count('time')
    # mld2000_frq = mld2000.where(mld2000>0)/len(da_mld.time)
    
    # newd = {"mldmax":mldmax.load(), 
    #         "mld2kfq":mld2000_frq.load(), 
    #         "pltx":pltx, 
    #         "plty":plty}
    
    # with open(fpath, 'wb') as wf:
    #     pickle.dump(newd, wf, pickle.HIGHEST_PROTOCOL)
        
    # print("{}: finished".format(f))
            
            # break

In [None]:
for f in os.listdir(path_data):
    if f != 'CanESM5-1':
        continue
    new_path = path_data + '/' + f
    all_files = os.listdir(new_path)
    s_files = [file for file in all_files if file.startswith("so_Omon")]
    t_files = [file for file in all_files if file.startswith("thetao_Omon")]

    s_files.sort()
    t_files.sort()
    
    # Iterate through the pairs of files
    fn = 0
    for s_file, t_file in zip(s_files, t_files):
        s_file_path = os.path.join(new_path, s_file)
        t_file_path = os.path.join(new_path, t_file)
        
        s_longname = s_file.split('_')[1:]
        t_longname = t_file.split('_')[1:]
        
        if s_longname != t_longname:
            print('time span does not match ')
            break
        else:
            dss = xr.open_dataset(s_file_path)
            dst = xr.open_dataset(t_file_path)

            if 'type' in dss.coords:
                das = dss.reset_coords('type', drop = True)
                dat = dst.reset_coords('type', drop = True)

            levname = datapd.loc[datapd['source_id'] == f]['zname'].values[0]

            nfns = np.linspace(0, len(dss.so.time), int(len(dss.so.time)/12+1))

            for nfn in range(0, len(nfns)-1):
                sfn = fn + nfn
                savepath = 'data_mld0/' + f + '_' + str(sfn) + '.pickle'
                if os.path.exists(savepath):
                    continue
                print('file {}, number {}, time {} - {}'.format(s_file, sfn, nfns[nfn], nfns[nfn+1]))

                das = dss.so.isel(time = slice(int(nfns[nfn]), int(nfns[nfn+1])))
                dat = dst.thetao.isel(time = slice(int(nfns[nfn]), int(nfns[nfn+1])))

                da_sigma0 = gsw.sigma0(das.where(das > 0), dat.where(dat != 0))
                
                if 'units' in das[levname].attrs:
                    if das[levname].units == 'centimeters':
                        da_sigma0[levname] = da_sigma0[levname]/100 # unit: cm --> m
                        
                
                if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['latname']):
                    if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['xname']):
                        print("{} doesn't have regular grid.".format(f))
                        continue
                    else:
                        da_south = da_sigma0.sel({datapd.loc[datapd['source_id'] == f].iloc[0]['yname']: slice(-90, -50)})
                        # da_south = da_sigma0.where(da_sigma0[datapd.at[i, 'xname']]< -50, drop=True)
                        pltx0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['xname']]
                        plty0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['yname']]
                        pltx, plty = np.meshgrid(pltx0, plty0)
                else:
                    da_south = da_sigma0.where(da_sigma0[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']] < -50, drop=True)
                    pltx = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['lonname']].load()
                    plty = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']].load()
                
                if len(np.shape(pltx)) > 2:
                    pltx = pltx.isel(time = 0)
                    plty = plty.isel(time = 0)
                    
                da_sep = list(da_south.groupby('time.month'))[8][-1]
                da_mld = cal_mld(da_sep, levname)
                da_mld_new = da_mld.load()
                
                with open(savepath, 'wb') as wf:
                    pickle.dump(da_mld_new, wf, pickle.HIGHEST_PROTOCOL)
            fn = sfn + 1

        # else:
        #     print(s_longname)
        #     print(t_longname)
        #     break
            # if fn == 0:
            #     da_mld_all = da_mld_new.load()
            # else:
            #     da_mld_all = xr.concat([da_mld_all, da_mld_new], dim="time")
            # if fn > 2:
            #     break
            # fn = fn + 1
            
        

    # mldmax = da_mld.max("time")
    # mld2000 = da_mld.where(da_mld >= 2000).count('time')
    # mld2000_frq = mld2000.where(mld2000>0)/len(da_mld.time)
    
    # newd = {"mldmax":mldmax.load(), 
    #         "mld2kfq":mld2000_frq.load(), 
    #         "pltx":pltx, 
    #         "plty":plty}
    
    # with open(fpath, 'wb') as wf:
    #     pickle.dump(newd, wf, pickle.HIGHEST_PROTOCOL)
        
    # print("{}: finished".format(f))
            
            # break

In [None]:
for f in os.listdir(path_data):
    if f != 'E3SM-2-0':
        continue
    new_path = path_data + '/' + f
    all_files = os.listdir(new_path)
    s_files = [file for file in all_files if file.startswith("so_Omon")]
    t_files = [file for file in all_files if file.startswith("thetao_Omon")]

    s_files.sort()
    t_files.sort()
    
    # Iterate through the pairs of files
    fn = 0
    for s_file, t_file in zip(s_files, t_files):
        s_file_path = os.path.join(new_path, s_file)
        t_file_path = os.path.join(new_path, t_file)
        
        s_longname = s_file.split('_')[1:]
        t_longname = t_file.split('_')[1:]
        
        if s_longname != t_longname:
            print('time span does not match ')
            break
        else:
            chunksize = {"time":12}

            dss = xr.open_dataset(s_file_path, chunks=chunksize)
            dst = xr.open_dataset(t_file_path, chunks=chunksize)

            levname = datapd.loc[datapd['source_id'] == f]['zname'].values[0]

            nfns = np.linspace(0, len(dss.so.time), 2) #int(len(dss.so.time)/12+1))

            for nfn in range(0, len(nfns)-1):
                sfn = fn + nfn
                savepath = 'data_mld0/' + f + '_' + str(sfn) + '.pickle'
                # if os.path.exists(savepath):
                #     continue
                print('file {}, number {}, time {} - {}'.format(s_file, sfn, nfns[nfn], nfns[nfn+1]))

                das = dss.so.isel(time = slice(int(nfns[nfn]), int(nfns[nfn+1])))
                dat = dst.thetao.isel(time = slice(int(nfns[nfn]), int(nfns[nfn+1])))

                da_sigma0 = gsw.sigma0(das.where(das > 0), dat.where(dat != 0))
                
                if 'units' in das[levname].attrs:
                    if das[levname].units == 'centimeters':
                        da_sigma0[levname] = da_sigma0[levname]/100 # unit: cm --> m
                        
                
                if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['latname']):
                    if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['xname']):
                        print("{} doesn't have regular grid.".format(f))
                        continue
                    else:
                        da_south = da_sigma0.sel({datapd.loc[datapd['source_id'] == f].iloc[0]['yname']: slice(-90, -50)})
                        # da_south = da_sigma0.where(da_sigma0[datapd.at[i, 'xname']]< -50, drop=True)
                        pltx0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['xname']]
                        plty0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['yname']]
                        pltx, plty = np.meshgrid(pltx0, plty0)
                else:
                    da_south = da_sigma0.where(da_sigma0[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']] < -50, drop=True)
                    pltx = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['lonname']].load()
                    plty = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']].load()
                
                if len(np.shape(pltx)) > 2:
                    pltx = pltx.isel(time = 0)
                    plty = plty.isel(time = 0)
                    
                da_sep = list(da_south.groupby('time.month'))[8][-1]
                da_mld = cal_mld(da_sep, levname)
                da_mld_new = da_mld.load()
                break

                # with open(savepath, 'wb') as wf:
                #     pickle.dump(da_mld_new, wf, pickle.HIGHEST_PROTOCOL)
            fn = sfn + 1
            break
        


In [None]:
da_mld_new.isel(time=0).plot()

In [None]:
da_sigma0.isel(time=0, lev = 0)

In [None]:
model1 = 'E3SM-2-0'

In [None]:
sfn

In [None]:
for newfn in range(0, sfn+1):
    picklefilename = 'data_mld0/' + model1 + '_' + str(newfn) + '.pickle'
    with open(picklefilename, 'rb') as pf:
        mld_data0 = pickle.load(pf)
    if newfn == 0:
        da_mld_all = mld_data0
    else:
        da_mld_all = xr.concat([da_mld_all, mld_data0], dim="time")


In [None]:
mldmax = da_mld_all.max("time")
mld2000 = da_mld.where(da_mld >= 2000).count('time')
mld2000_frq = mld2000.where(mld2000>0)/len(da_mld.time)

newd = {"mldmax":mldmax, 
        "mld2kfq":mld2000_frq,
        "pltx":pltx, 
        "plty":plty}


datasavepath = 'data_mld/' + model1 + '.pickle'
with open(datasavepath, 'wb') as wf:
        pickle.dump(newd, wf, pickle.HIGHEST_PROTOCOL)

In [None]:
model1 = 'CanESM5-1'

In [None]:
for f in os.listdir(path_data):
    if f != model1:
        continue
    new_path = path_data + '/' + f
    all_files = os.listdir(new_path)
    s_files = [file for file in all_files if file.startswith("so_Omon")]
    t_files = [file for file in all_files if file.startswith("thetao_Omon")]

    s_files.sort()
    t_files.sort()
    
    # Iterate through the pairs of files
    fn = 0
    for s_file, t_file in zip(s_files, t_files):
        savepath = 'data_mld0/' + f + '_' + str(fn) + '.pickle'
        if os.path.exists(savepath):
            fn += 1
            continue

        s_file_path = os.path.join(new_path, s_file)
        t_file_path = os.path.join(new_path, t_file)
        
        s_longname = s_file.split('_')[1:]
        t_longname = t_file.split('_')[1:]
        
        if s_longname == t_longname:

            chunksize = {"time":12}

            dss = xr.open_dataset(s_file_path, chunks=chunksize)
            dst = xr.open_dataset(t_file_path, chunks=chunksize)

            das = dss.so
            dat = dst.thetao

            if 'type' in das.coords:
                das = das.reset_coords('type', drop = True)
                dat = dat.reset_coords('type', drop = True)

            
            da_sigma0 = gsw.sigma0(das.where(das > 0), dat.where(dat != 0))

            levname = datapd.loc[datapd['source_id'] == f]['zname'].values[0]
            if 'units' in das[levname].attrs:
                if das[levname].units == 'centimeters':
                    da_sigma0[levname] = da_sigma0[levname]/100 # unit: cm --> m

            if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['latname']):
                if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['xname']):
                    print("{} doesn't have regular grid.".format(f))
                    continue
                else:
                    da_south = da_sigma0.sel({datapd.loc[datapd['source_id'] == f].iloc[0]['yname']: slice(-90, -50)})
                    # da_south = da_sigma0.where(da_sigma0[datapd.at[i, 'xname']]< -50, drop=True)
                    pltx0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['xname']]
                    plty0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['yname']]
                    pltx, plty = np.meshgrid(pltx0, plty0)
            else:
                da_south = da_sigma0.where(da_sigma0[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']] < -50, drop=True)
                pltx = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['lonname']].load()
                plty = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']].load()
                

            if len(np.shape(pltx)) > 2:
                pltx = pltx.isel(time = 0)
                plty = plty.isel(time = 0)

            da_sep = list(da_south.groupby('time.month'))[8][-1]
            da_mld = cal_mld(da_sep, levname)
            da_mld_new = da_mld.load()

            with open(savepath, 'wb') as wf:
                pickle.dump(da_mld_new, wf, pickle.HIGHEST_PROTOCOL)
            fn += 1
        else:
            print(s_longname)
            print(t_longname)
            break
            # if fn == 0:
            #     da_mld_all = da_mld_new.load()
            # else:
            #     da_mld_all = xr.concat([da_mld_all, da_mld_new], dim="time")
            # if fn > 2:
            #     break
            # fn = fn + 1
            
        

    # mldmax = da_mld.max("time")
    # mld2000 = da_mld.where(da_mld >= 2000).count('time')
    # mld2000_frq = mld2000.where(mld2000>0)/len(da_mld.time)
    
    # newd = {"mldmax":mldmax.load(), 
    #         "mld2kfq":mld2000_frq.load(), 
    #         "pltx":pltx, 
    #         "plty":plty}
    
    # with open(fpath, 'wb') as wf:
    #     pickle.dump(newd, wf, pickle.HIGHEST_PROTOCOL)
        
    # print("{}: finished".format(f))
            
            # break

In [None]:
for newfn in range(0, fn):
    picklefilename = 'data_mld0/' + model1 + '_' + str(newfn) + '.pickle'
    with open(picklefilename, 'rb') as pf:
        mld_data0 = pickle.load(pf)
    if newfn == 0:
        da_mld_all = mld_data0
    else:
        da_mld_all = xr.concat([da_mld_all, mld_data0], dim="time")


In [None]:
mldmax = da_mld_all.max("time")
mld2000 = da_mld.where(da_mld >= 2000).count('time')
mld2000_frq = mld2000.where(mld2000>0)/len(da_mld.time)

newd = {"mldmax":mldmax, 
        "mld2kfq":mld2000_frq,
        "pltx":pltx, 
        "plty":plty}


datasavepath = 'data_mld/' + model1 + '.pickle'
with open(datasavepath, 'wb') as wf:
        pickle.dump(newd, wf, pickle.HIGHEST_PROTOCOL)

In [None]:
dss.so

In [None]:
for f in os.listdir(path_data):
    if f != 'GISS-E2-2-H':
        continue
    new_path = path_data + '/' + f
    fpath = 'data_mld/' + f + '.pickle'
    # print(new_path)

    sf = new_path + "/so*.nc"
    tf = new_path + "/thetao*.nc"
    # print(new_sof)
    dss = xr.open_mfdataset(sf)
    dst = xr.open_mfdataset(tf)

    das = dss.so
    dat = dst.thetao
    
    if 'type' in das.coords:
        das = das.reset_coords('type', drop = True)
        dat = dat.reset_coords('type', drop = True)
    
    da_sigma0 = gsw.sigma0(das.where(das > 0), dat.where(dat != 0))

    levname = datapd.loc[datapd['source_id'] == f]['zname'].values[0]
    if 'units' in das[levname].attrs:
        if das[levname].units == 'centimeters':
            da_sigma0[levname] = da_sigma0[levname]/100 # unit: cm --> m

    if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['latname']):
        if pd.isna(datapd.loc[datapd['source_id'] == f].iloc[0]['xname']):
            print("{} doesn't have regular grid.".format(f))
            continue
        else:
            da_south = da_sigma0.sel({datapd.loc[datapd['source_id'] == f].iloc[0]['yname']: slice(-90, -50)})
            # da_south = da_sigma0.where(da_sigma0[datapd.at[i, 'xname']]< -50, drop=True)
            pltx0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['xname']]
            plty0 = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['yname']]
            pltx, plty = np.meshgrid(pltx0, plty0)
    else:
        da_south = da_sigma0.where(da_sigma0[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']] < -50, drop=True)
        pltx = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['lonname']].load()
        plty = da_south[datapd.loc[datapd['source_id'] == f].iloc[0]['latname']].load()
    
    if len(np.shape(pltx)) > 2:
        pltx = pltx.isel(time = 0)
        plty = plty.isel(time = 0)

    da_sep = list(da_south.groupby('time.month'))[8][-1]
    da_mld = cal_mld(da_sep, levname)
    
    mldmax = da_mld.max("time")
    mld2000 = da_mld.where(da_mld >= 2000).count('time')
    mld2000_frq = mld2000.where(mld2000>0)/len(da_mld.time)
    
    newd = {"mldmax":mldmax.load(), 
            "mld2kfq":mld2000_frq.load(), 
            "pltx":pltx, 
            "plty":plty}
    
    with open(fpath, 'wb') as wf:
        pickle.dump(newd, wf, pickle.HIGHEST_PROTOCOL)
        
    print("{}: finished".format(f))


    break

In [None]:
datapd.loc[datapd['source_id'] == f].iloc[0]['xname']

In [None]:
from intake import open_esm_datastore
col = open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

In [None]:
cat = col.search(variable_id = ['thetao', 'so'], experiment_id = 'piControl')
nlist = cat.df.source_id.unique()
collist = list(cat)

In [None]:
cat.df.to_csv("dataset_st_raw.csv")

In [None]:
datapd

In [None]:
cat = col.search(variable_id = ['thetao', 'so'], experiment_id = 'piControl', source_id = 'EC-Earth3-LR')
cat.df

In [None]:
cat.df.zstore[1]

In [None]:
xr.open_zarr(gcs.get_mapper("gs://cmip6/CMIP6/CMIP/NUIST/NESM3/piControl/r1i1p1f1/SImon/siconc/gn/v20190704/"), consolidated=True)

In [None]:
xr.open_zarr(gcs.get_mapper(cat.df.zstore[1]), consolidated=True)

In [None]:
import intake
col = intake.open_esm_datastore(
    "https://storage.googleapis.com/leap-persistent-ro/data-library/catalogs/cmip6-test/leap-pangeo-cmip6-test.json"
)
cat = col.search(variable_id = ['thetao', 'so'], experiment_id = 'piControl')

In [None]:
cat.df

In [None]:
col = intake.open_esm_datastore(
    "https://storage.googleapis.com/leap-persistent-ro/data-library/catalogs/cmip6-test/leap-pangeo-cmip6-test.json"
)

In [None]:
cat['CMIP.EC-Earth-Consortium.EC-Earth3-LR.piControl.Omon.gn'].to_dask()

In [None]:
for i in range(0, int(len(datapd)/2)):
    if datapd.at[2*i, 'source_id'] == datapd.at[2*i+1, 'source_id']:
        if datapd.at[2*i, 'member_id'] == datapd.at[2*i+1, 'member_id']:
            if datapd.at[2*i, 'grid_label'] == datapd.at[2*i+1, 'grid_label']:
                ds1 = xr.open_zarr(gcs.get_mapper(datapd.at[2*i,'zstore']), consolidated=True)
                # ds2 = xr.open_zarr(gcs.get_mapper(datapd.at[2*i+1,'zstore']), consolidated=True)
                print(datapd.at[2*i, 'source_id'], ' ', datapd.at[2*i, 'grid_label'])
                print(ds1[datapd.at[2*i, 'variable_id']], '\n')
                
            else:
                print(i)
        else:
            print(i)
    else:
        print(i)
