In [1]:
# filter some warning messages
import warnings
warnings.filterwarnings("ignore")

In [2]:
import numpy as np
import xarray as xr
import pandas as pd

import os
import glob
import pickle

from pyproj import Geod

In [3]:
datapd = pd.read_csv('all_coord.csv')

In [20]:
list_not_complete_latlon = []
list_not_complete_latlon_area = []
list_not_equal_coords = []

for i in range(0, len(datapd)):
    name = datapd.at[i, 'source_id']
    print("{} {}".format(i, name))
    data_path = '/mnt/d/CMIP6/siconc_SImon_' + name + '_piControl_' + datapd.at[i, 'member_id'] + '*' + datapd.at[i, 'grid_label'] + '*' + '.nc'
    if name == 'NorESM2-LM' or name == 'NorESM2-MM':
        data_path = '/mnt/d/CMIP6/siconc_SImon_' + name + '_piControl_' + datapd.at[i, 'member_id'] + '*' + '.nc'
    matching_files = glob.glob(data_path)
    ds = xr.open_mfdataset(matching_files[0], use_cftime=True)
    print("    Variables: {}".format(list(ds.variables)))
    print("    Dimensions (siconc): {} : {}".format(ds.siconc.dims, ds.siconc.shape))
    if not pd.isna(datapd.at[i, 'latname']):
        if name == 'NESM3':
            dlat = ds.lat.load()
            dlon = ds.lon.load()
        else:
            dlat = ds[datapd.at[i, 'latname']].load()
            dlon = ds[datapd.at[i, 'lonname']].load()
        dlat = dlat.where(dlat <= 90).where(dlat>=-90)
        dlon = dlon.where(dlon <= 360).where(dlon>=-360)
        if np.isnan(dlat).any() or np.isnan(dlon).any():
            print("        Not complete lat/lon coordinates")
            list_not_complete_latlon.append(name)

    grid_path = 'data_areacello/areacello_Ofx_' + name + '_*' + '.nc'
    if name == 'GISS-E2-2-H':
        grid_path = 'data_areacello/areacello_Ofx_GISS-E2-1-H_piControl_' + '*' + '.nc'
    elif name == 'UKESM1-1-LL':
        grid_path = 'data_areacello/areacello_Ofx_UKESM1-0-LL_piControl_' + '*' + '.nc'
    # Get the list of files matching the search pattern
    matching_gfiles = glob.glob(grid_path)
    if len(matching_gfiles)==0:
        print("    [x] No cell area data.")
        continue
    dsg = xr.open_mfdataset(matching_gfiles)
    if pd.isna(datapd.at[i, 'latname']):
        print("    -> Variables: {}".format(list(dsg.variables)))
    print("    Dimensions (areacello): {} : {}".format(dsg.areacello.dims, dsg.areacello.shape))
    if not pd.isna(datapd.at[i, 'latname']):
        if name == 'NESM3':
            dlat = dsg.lat.load()
            dlon = dsg.lon.load()  
        elif name == 'CAS-ESM2-0':
            print("    [!] ice data no lat/lon")
            list_not_complete_latlon_area.append(name)
            continue
        else:
            dlat = dsg[datapd.at[i, 'latname']].load()
            dlon = dsg[datapd.at[i, 'lonname']].load()
        dlat = dlat.where(dlat < 100)
        dlon = dlon.where(dlon < 500)
        if np.isnan(dlat).any() or np.isnan(dlon).any():
            print("        Not complete lat/lon coordinates")
            list_not_complete_latlon_area.append(name)
    if len(dsg.areacello.shape) <2:
        continue
    if ds.siconc.shape[1] != dsg.areacello.shape[0] or ds.siconc.shape[2] != dsg.areacello.shape[1]:
        print("    [!] coords not equal")
        list_not_equal_coords.append(name)

0 BCC-CSM2-MR
    Variables: ['time', 'time_bnds', 'lat', 'lat_bnds', 'lon', 'lon_bnds', 'latitude', 'longitude', 'type', 'siconc']
    Dimensions (siconc): ('time', 'lat', 'lon') : (7200, 232, 360)
    Dimensions (areacello): ('lat', 'lon') : (232, 360)
1 BCC-ESM1
    Variables: ['time', 'time_bnds', 'lat', 'lat_bnds', 'lon', 'lon_bnds', 'latitude', 'longitude', 'type', 'siconc']
    Dimensions (siconc): ('time', 'lat', 'lon') : (5412, 232, 360)
    Dimensions (areacello): ('lat', 'lon') : (232, 360)
2 CAMS-CSM1-0
    Variables: ['time', 'time_bnds', 'j', 'i', 'latitude', 'longitude', 'vertices_latitude', 'vertices_longitude', 'type', 'siconc']
    Dimensions (siconc): ('time', 'j', 'i') : (3000, 200, 360)
    Dimensions (areacello): ('j', 'i') : (200, 360)
3 ACCESS-ESM1-5
    Variables: ['time', 'time_bnds', 'j', 'i', 'latitude', 'longitude', 'vertices_latitude', 'vertices_longitude', 'type', 'siconc']
    Dimensions (siconc): ('time', 'j', 'i') : (6000, 300, 360)
    Dimensions (are

In [4]:
list_not_complete_latlon = ['CAS-ESM2-0', 'CIESM', 'TaiESM1', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 'SAM0-UNICON', 'NorESM2-MM', 'NorESM2-LM']
list_not_complete_latlon_area = ['CAS-ESM2-0', 'CESM2-FV2']
list_not_equal_coords = ['NESM3', 'CMCC-CM2-SR5', 'CMCC-ESM2', 'NorESM2-MM', 'NorESM2-LM']

Some exceptions:

8 NESM3
 
    Dimensions (siconc): ('time', 'nj', 'ni') : (1200, 384, 320)
    
    Dimensions (areacello): ('nj', 'ni') : (292, 362)

12 CMCC-CM2-SR5
    
    Dimensions (siconc): ('time', 'i', 'j') : (3000, 291, 360)
    
    Dimensions (areacello): ('i', 'j') : (292, 362)

13 CMCC-ESM2
    
    Dimensions (siconc): ('time', 'i', 'j') : (3000, 291, 360)
    
    Dimensions (areacello): ('i', 'j') : (292, 362)

46 NorESM2-MM
    
    Dimensions (siconc): ('time', 'j', 'i') : (120, 384, 360)

    Dimensions (areacello): ('j', 'i') : (385, 360)

47 NorESM2-LM
    
    Dimensions (siconc): ('time', 'j', 'i') : (120, 384, 360)
    
    Dimensions (areacello): ('j', 'i') : (385, 360)

In [5]:
def ispickleexists(n, p0):
    p = p0 + n + '.pickle'
    if os.path.exists(p):
        # print('    [o] {} exists.'.format(p))
        return True
    else:
        return False

def openpickle(n, p0):
    p = p0 + n + '.pickle'
    with open(p, 'rb') as df:
        d = pickle.load(df)
    return d

def savepickle(n, p0, sf):
    p = p0 + n + '.pickle'
    with open(p, 'wb') as wf:
        pickle.dump(sf, wf, pickle.HIGHEST_PROTOCOL)

def get_south(da, datapd, i, southlat = -40, newlatname =0):
    if pd.isna(datapd.at[i, 'latname']):
        da_south = da.sel({datapd.at[i, 'yname']:slice(-90, southlat)})
    else:
        latname = datapd.at[i, 'latname']
        if newlatname:
            latname = newlatname
        da_lat0 = da[latname].load()
        da_lat = da_lat0.where((da_lat0<=90) & (da_lat0>=-90))
        da_south = da.where(da_lat <= southlat, drop=True)
    return da_south

def get_sepsouth(mf, datapd, i, southlat = -40):
    ds = xr.open_mfdataset(mf, use_cftime=True)
    da = ds.siconc
    if 'type' in da.coords:
        da = da.reset_coords('type', drop = True)
    da_sep = da.isel(time=(da.time.dt.month == 9))
    da_south = get_south(da_sep, datapd, i, southlat)
    return da_south

def openicenc(p0, datapd, i, southlat = -40):
    data_path = p0 + datapd.at[i, 'source_id'] + '_piControl_' + datapd.at[i, 'member_id'] + '*' + datapd.at[i, 'grid_label'] + '*' + '.nc'
    if datapd.at[i, 'source_id']  == 'NorESM2-LM' or datapd.at[i, 'source_id'] == 'NorESM2-MM':
        data_path = p0 + datapd.at[i, 'source_id'] + '_piControl_' + datapd.at[i, 'member_id'] + '*' + '.nc'
    matching_files = glob.glob(data_path)
    if len(matching_files)>200:    
        for t in range(len(matching_files)):
            da_south = get_sepsouth(matching_files[t], datapd, i, southlat = southlat)
            if t == 0:
                da_save = da_south.load()
            else:
                da_save0 = da_south.load()
                da_save = xr.concat([da_save, da_save0], dim="time")
        da_s = da_save
    else:
        da_south = get_sepsouth(matching_files, datapd, i, southlat = southlat)
        da_s = da_south.load()
    return da_s

def get_new_xy(d, datapd, j, newlonlat = 0):
    if newlonlat:
        newlon = d[newlonlat[0]]
        newlat = d[newlonlat[1]]
    else:
        if pd.isna(datapd.at[j, 'latname']):
            pltx0 = d[datapd.at[j, 'xname']]
            plty0 = d[datapd.at[j, 'yname']]
            newlon0, newlat0 = np.meshgrid(pltx0, plty0)
            newlon = xr.DataArray(newlon0, dims={datapd.at[j, 'yname']:plty0.values, datapd.at[j, 'xname']:pltx0.values})
            newlat = xr.DataArray(newlat0, dims={datapd.at[j, 'yname']:plty0.values, datapd.at[j, 'xname']:pltx0.values})
        else:
            newlon = d[datapd.at[j, 'lonname']]
            newlat = d[datapd.at[j, 'latname']]
    if len(np.shape(newlat)) > 2:
        newlon = newlon.isel(time = 0)
        newlat = newlat.isel(time = 0)
    return newlon, newlat

def copy_coords(icedata, areadata, datapd, i):
    newd = areadata.assign_coords(
        {
            datapd.at[i, 'latname']: icedata[datapd.at[i, 'latname']],
            datapd.at[i, 'lonname']: icedata[datapd.at[i, 'lonname']],
        }
    )
    return newd

def rename_xy(data_copyfrom, data_copyto):
    new = data_copyto.rename(
        {
            data_copyto.dims[len(data_copyto.dims)-1]:data_copyfrom.dims[len(data_copyfrom.dims)-1], 
            data_copyto.dims[len(data_copyto.dims)-2]:data_copyfrom.dims[len(data_copyfrom.dims)-2], 
        }
    )
    return new

def copy_coords_xy(data_copyfrom, data_copyto, changename = False):
    if changename:
        data_copyto = rename_xy(data_copyfrom, data_copyto)
    data_copyto[data_copyto.dims[len(data_copyto.dims)-1]] = data_copyfrom[data_copyfrom.dims[len(data_copyfrom.dims)-1]].values
    data_copyto[data_copyto.dims[len(data_copyto.dims)-2]] = data_copyfrom[data_copyfrom.dims[len(data_copyfrom.dims)-2]].values
    return data_copyto

def newxy_fmissingxy(dx, dy):
    dx = dx.where((dx>-361) & (dx<361))
    dy = dy.where((dy>-91) & (dy<91))
    newx0 = dx[~np.isnan(dx).any(axis=1)][0]
    newy0 = dy[:, ~np.isnan(dy).any(axis=0)][:,0]
    newx, newy = np.meshgrid(newx0, newy0)
    x = np.where(np.isnan(dx), newx, dx)
    y = np.where(np.isnan(dy), newy, dy)
    return x, y

def replace_missingxg(d, datapd, j, newlonlat = 0):
    if newlonlat:
        lon = d[newlonlat[0]]
        lat = d[newlonlat[1]]
    else:
        lon = d[datapd.at[j, 'lonname']]
        lat = d[datapd.at[j, 'latname']]
    if 'time' in lat.dims:
        lon = lon.isel(time = 0)
        lat = lat.isel(time = 0)
    newlon, newlat = newxy_fmissingxy(lon, lat)
    newd = d.assign_coords(
        {
            datapd.at[j, 'lonname']: (lon.dims, newlon),
            datapd.at[j, 'latname']: (lat.dims, newlat),
        }
    )
    return newd

def calculate_area_xy(icedata):
    g = Geod(ellps='sphere')

    y = icedata[icedata.dims[1]]
    x = icedata[icedata.dims[2]]

    dx = np.empty((icedata.shape[1], icedata.shape[2]))*np.nan
    dy = np.empty((icedata.shape[1], icedata.shape[2]))*np.nan

    for i in range(len(x)-1):
        for j in range(len(y)):
            _,_,dx[j,i] = g.inv(x[i].values, y[j].values, x[i+1].values, y[j].values)
    for j in range(len(y)):
        _,_,dx[j, -1] = g.inv(x[-1].values, y[j].values,x[0].values, y[j].values)
    
    for i in range(len(x)):
        for j in range(len(y)-1):
            _,_,dy[j,i] = g.inv(x[i].values, y[j].values, x[i].values, y[j+1].values)
    for i in range(len(x)):
        dy[-1, i] = dy[-2, i]
    
    areadata = xr.DataArray(
        data = dx*dy,
        dims=icedata.dims[1:],
        coords={list(icedata.coords)[1]: icedata[list(icedata.coords)[1]], list(icedata.coords)[2]:icedata[list(icedata.coords)[2]]}
    )
    return areadata

def match_unmatching_grid(icedata, areadata):
    newareadata = xr.DataArray(
        data=np.empty(icedata.shape[1:])*np.nan,
        dims=icedata.dims[1:],
        coords={list(icedata.coords)[1]: icedata[list(icedata.coords)[1]], list(icedata.coords)[2]:icedata[list(icedata.coords)[2]]}
    )
    return newareadata + areadata

def set_nan_to_zero(icedata):
    ## for E3SM-2-0
    d0 = icedata.fillna(0)
    mlat = icedata.idxmin(icedata.dims[1])  ## find the ice extent edge
    a = d0.where(d0[icedata.dims[1]]>=mlat).where(d0 == 0)
    newice = xr.where((a>=0)|(icedata>=0), d0, np.nan)
    return newice

def set_zero_to_nan(icedata):
    ## for GISS, INM-CM4-8
    dnan = icedata.where(icedata>0)
    mlat = dnan.idxmin(dnan.dims[len(dnan.dims)-2])  ## find the ice extent edge
    a = icedata.where(icedata[icedata.dims[len(icedata.dims)-2]]>=mlat).where(icedata == 0)
    newice = xr.where((a>=0)|(dnan>=0), icedata, np.nan)
    return newice

def modify_area_grid_to_ice_grid(icedata, areadata):
    ## only for 'CAS-ESM2-0'
    ## area lon 0~359 ; ice lon 1~360 
    ## first modify the lon in areadata
    a = areadata.isel({areadata.dims[1]:0})  # select lon=0
    a['lon'] = areadata[areadata.dims[1]][-1].values+1  # resign lon = 360 to lon=0 
    b = areadata.sel({areadata.dims[1]:slice(1, None)})  # select lon = 1~359
    new_area = xr.concat([b, a], dim=areadata.dims[1])  # combine 1~359 & 360
    area = copy_coords_xy(icedata, new_area, changename=True)
    return area


def flip_y(ds):
    ## for MPI 
    new_y = np.flip(ds[ds.dims[len(ds.dims)-2]])
    ds = ds.reindex({ds.dims[len(ds.dims)-2]: new_y})
    dsnew = ds.assign_coords(
        {
            ds.dims[len(ds.dims)-2]: range(0, len(ds[ds.dims[len(ds.dims)-2]]))
        }
    )
    return dsnew


    

In [16]:
for i in range(0, len(datapd)):
    name = datapd.at[i, 'source_id']
    print("{} {}".format(i, name))
    if pd.isna(datapd.at[i, 'xname']):
        print("    [x] doesn't have regular grid.")
        continue
    if ispickleexists(name, 'data_siconc_w_area/'):
        print("    [o] data exist.")
        continue
    if ispickleexists(name, 'data_siconc/'):
        icedata = openpickle(name, 'data_siconc/')
    else:
        icedata = openicenc('/mnt/d/CMIP6/siconc_SImon_', datapd, i)
        savepickle(name, 'data_siconc/', icedata)
        print("    [*] ice data saved.")
    
    grid_path = 'data_areacello/areacello_Ofx_' + name + '_*' + '.nc'
    if name == 'GISS-E2-2-H':
        grid_path = 'data_areacello/areacello_Ofx_GISS-E2-1-H_piControl_' + '*' + '.nc'
    elif name == 'UKESM1-1-LL':
        grid_path = 'data_areacello/areacello_Ofx_UKESM1-0-LL_piControl_' + '*' + '.nc'
    # Get the list of files matching the search pattern
    matching_gfiles = glob.glob(grid_path)
    dsg = xr.open_mfdataset(matching_gfiles)

    if name in ['NESM3', 'CAS-ESM2-0']:
        areadata = get_south(dsg.areacello, datapd, i, southlat = -40, newlatname='lat')
    else:
        areadata = get_south(dsg.areacello, datapd, i, southlat = -40)

    if (name in list_not_equal_coords) or (name in ['KIOST-ESM']):
        if name in ['CMCC-CM2-SR5', 'CMCC-ESM2']:
            areadata = areadata.isel(j = slice(1, len(areadata.j)-1))
            areadata = copy_coords_xy(icedata, areadata)
        elif name in ['NorESM2-MM', 'NorESM2-LM']:
            areadata = match_unmatching_grid(icedata, areadata)
        else:
            areadata = calculate_area_xy(icedata)
    if name in list_not_complete_latlon:
        if name not in list_not_complete_latlon_area:
            if icedata.dims[1:] != areadata.dims[:]:
                areadata = areadata.rename({areadata.dims[0]:icedata.dims[1], areadata.dims[1]:icedata.dims[2]})
            if name in ['TaiESM1']:
                icedata = copy_coords_xy(areadata, icedata)
            icedata = copy_coords(areadata, icedata, datapd, i)
        else:
            icedata = replace_missingxg(icedata, datapd, i)
            if name == 'CESM2-FV2':
                areadata = copy_coords_xy(icedata, areadata)
                icedata = copy_coords_xy(areadata, icedata, changename=True)
                areadata = copy_coords(icedata, areadata, datapd, i)
            elif name == 'CAS-ESM2-0':
                areadata = copy_coords_xy(icedata, areadata, changename=True)
    elif name in ['ACCESS-ESM1-5', 'ACCESS-CM2', 'NorCPM1']: 
        areadata = copy_coords(icedata, areadata, datapd, i)
    elif name in ['GISS-E2-1-H', 'GISS-E2-2-H']:
        areadata = copy_coords_xy(icedata, areadata)
    elif name in ['FGOALS-g3']:
        icedata = icedata.assign_coords(
            {
                'latitude':icedata.latitude.isel(time = 0).reset_coords('time', drop = True), 
                'longitude':icedata.longitude.isel(time = 0).reset_coords('time', drop = True)
            }
        )
        areadata = areadata.reindex(j=list(reversed(areadata.j)))
        areadata = areadata.assign_coords({'j':icedata.j})

    if name in ['E3SM-2-0', 'E3SM-2-0-NARRM']:
        icedata = set_nan_to_zero(icedata)
    elif name in ['GISS-E2-1-H', 'GISS-E2-2-H', 'INM-CM4-8']:
        icedata = set_zero_to_nan(icedata)
    elif name in ['MPI-ESM-1-2-HAM', 'MPI-ESM1-2-HR', 'MPI-ESM1-2-LR']:
        icedata = flip_y(icedata)
        areadata = flip_y(areadata)

    if name in ['NESM3']:
        newlon, newlat = get_new_xy(icedata, datapd, i, newlonlat=['lon', 'lat'])
    else: 
        newlon, newlat = get_new_xy(icedata, datapd, i)

    try:
        new_ds = xr.Dataset(
            data_vars={
                'siconc':icedata.load(), 
                'areacello':areadata.load(), 
                'newlat':newlat.load(),
                'newlon':newlon.load()
            }
        )
        savepickle(name, 'data_siconc_w_area/', new_ds)
        print("    [*] complete!")
    except Exception as error:
        print("    An exception occurred:", error) # An exception occurred


0 BCC-CSM2-MR
    [o] data exist.
1 BCC-ESM1
    [o] data exist.
2 CAMS-CSM1-0
    [o] data exist.
3 ACCESS-ESM1-5
    [o] data exist.
4 ACCESS-CM2
    [o] data exist.
5 GFDL-CM4
    [o] data exist.
6 GFDL-ESM4
    [o] data exist.
7 KIOST-ESM
    [o] data exist.
8 NESM3
    [o] data exist.
9 CanESM5
    [o] data exist.
10 CanESM5-1
    [o] data exist.
11 CanESM5-CanOE
    [o] data exist.
12 CMCC-CM2-SR5
    [o] data exist.
13 CMCC-ESM2
    [o] data exist.
14 EC-Earth3
    [o] data exist.
15 EC-Earth3-CC
    [o] data exist.
16 EC-Earth3-LR
    [o] data exist.
17 EC-Earth3-Veg
    [o] data exist.
18 EC-Earth3-Veg-LR
    [o] data exist.
19 EC-Earth3-AerChem
    [o] data exist.
20 CNRM-CM6-1
    [o] data exist.
21 CNRM-CM6-1-HR
    [o] data exist.
22 CNRM-ESM2-1
    [o] data exist.
23 HadGEM3-GC31-LL
    [o] data exist.
24 HadGEM3-GC31-MM
    [o] data exist.
25 UKESM1-0-LL
    [o] data exist.
26 UKESM1-1-LL
    [o] data exist.
27 IPSL-CM5A2-INCA
    [o] data exist.
28 IPSL-CM6A-LR
    [o] 

! Note (why some models are not chosen)

'NorESM1-F': no temp&salt data, no mlotst on CMIP6 site

'INM-CM5-0': have temp but no salt data(only global salt) and no mlotst data

'AWI-CM-1-1-MR': no monthly siconc data

'FIO-ESM-2-0': no salt data

--

other note

'IPSL-CM6A-MR1': have area data in thetao
