In [1]:
from xmip.preprocessing import combined_preprocessing
from xmip.utils import google_cmip_col
import matplotlib.pyplot as plt
import xarray as xr

In [2]:
cmip6 = google_cmip_col()

In [96]:
models = [
 'IPSL-CM6A-LR',
 'MIROC6',
 'SAM0-UNICON',
 'CanESM5',
 'MPI-ESM-1-2-HAM',
 'MPI-ESM1-2-HR',
 'MPI-ESM1-2-LR',
 'GFDL-ESM4',
 'NorESM2-LM',
 'MRI-ESM2-0',
 'NorESM2-MM',
 'FIO-ESM-2-0',
 'CMCC-CM2-SR5',
 'EC-Earth3-AerChem',
 'TaiESM1',
 'NorCPM1',
 'IPSL-CM5A2-INCA',
 'ACCESS-ESM1-5',
 'ACCESS-CM2',
 'CMCC-CM2-HR4',
 'EC-Earth3',
 'EC-Earth3-Veg-LR',
 'CAS-ESM2-0',
 'EC-Earth3-CC',
 'CMCC-ESM2',
 'IPSL-CM6A-LR-INCA']

In [63]:
#extract from the catalogue SIC and cell area
cat = cmip6.search(
    source_id=models,
    variable_id='siconc', 
    table_id='SImon',
    experiment_id='historical',
    member_id='r1i1p1f1',
    grid_label='gn'
)
ddict = cat.to_dataset_dict(
    preprocess=combined_preprocessing,
    xarray_open_kwargs={'use_cftime':True},
)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'




In [64]:
ddict.keys()

dict_keys(['CMIP.MRI.MRI-ESM2-0.historical.SImon.gn', 'CMIP.CSIRO.ACCESS-ESM1-5.historical.SImon.gn', 'CMIP.CAS.CAS-ESM2-0.historical.SImon.gn', 'CMIP.IPSL.IPSL-CM6A-LR-INCA.historical.SImon.gn', 'CMIP.NCC.NorESM2-LM.historical.SImon.gn', 'CMIP.EC-Earth-Consortium.EC-Earth3-CC.historical.SImon.gn', 'CMIP.IPSL.IPSL-CM6A-LR.historical.SImon.gn', 'CMIP.AS-RCEC.TaiESM1.historical.SImon.gn', 'CMIP.NOAA-GFDL.GFDL-ESM4.historical.SImon.gn', 'CMIP.CAS.FGOALS-g3.historical.SImon.gn', 'CMIP.CCCma.CanESM5.historical.SImon.gn', 'CMIP.NCC.NorESM2-MM.historical.SImon.gn', 'CMIP.EC-Earth-Consortium.EC-Earth3.historical.SImon.gn', 'CMIP.EC-Earth-Consortium.EC-Earth3-AerChem.historical.SImon.gn', 'CMIP.EC-Earth-Consortium.EC-Earth3-Veg-LR.historical.SImon.gn', 'CMIP.NCC.NorCPM1.historical.SImon.gn', 'CMIP.CMCC.CMCC-ESM2.historical.SImon.gn', 'CMIP.SNU.SAM0-UNICON.historical.SImon.gn', 'CMIP.MPI-M.MPI-ESM1-2-HR.historical.SImon.gn', 'CMIP.CMCC.CMCC-CM2-HR4.historical.SImon.gn', 'CMIP.CSIRO-ARCCSS.ACCESS

In [65]:
def findedge(sic):
    # this function is quite a hack and not optimized to work with cloud data, sorry!
    import numpy as np
    from scipy.interpolate import interp1d
    from calendar import month_abbr
    import pandas as pd
  
    N,JM,IM = sic.shape
    lat = sic.lat.values
    lon = sic.lon.values
    edgelat=np.ones([IM])
    edgelon=np.ones([IM])
    # interpolating grid
    ilon = np.arange(0,360,0.5)
    ilat = np.ones([N,len(ilon)])
    
    for t in range(N): # iterate over time
        c = sic.isel(time=t).values
        for i in range(IM):           #iterating over every longitude
            ice=np.where(c[:,i]>0)[0] #find indices where SIC>0
            if ice.size == 0:         #if no ice, make both coords NaN
                edgelat[i]=np.NaN
                edgelon[i]=np.NaN
            else:
                jice=ice[-1]           #otherwise, take the last index and store coords
                edgelat[i]=lat[jice,i]
                edgelon[i]=lon[jice,i]
        f = interp1d(edgelon, edgelat, bounds_error=False, fill_value='extrapolate')
        ilat[t,:] = f(ilon)

    # calculate climatologies and create the dataframe
    months = [month_abbr[i] for i in [1,2,11,12]]
    months_std = [month_abbr[i]+'_std' for i in [1,2,11,12]]
    df = pd.DataFrame(index=ilon,columns=months+months_std)
    df.index.name = 'Longitude'
    jan = ilat[range(0,40,4),:] # extract the columns corresponding to the month
    feb = ilat[range(1,40,4),:]
    nov = ilat[range(2,40,4),:]
    dec = ilat[range(3,41,4),:]
    df['Jan'] = np.median(jan,axis=0)
    df['Jan_std'] = np.std(jan,axis=0)
    df['Feb'] = np.median(feb,axis=0)
    df['Feb_std'] = np.std(feb,axis=0)
    df['Nov'] = np.median(nov,axis=0)
    df['Nov_std'] = np.std(nov,axis=0)
    df['Dec'] = np.median(dec,axis=0)
    df['Dec_std'] = np.std(dec,axis=0)
    return df

In [66]:
# find edge and store the dataframe
for name, ds in ddict.items():
    print(name)
    mname = ds.attrs['source_id']
    sic = ds.siconc.sel(time=slice('1930','1939')).squeeze()
    sic = sic.sel(time=sic.time.dt.month.isin([11,12,1,2])) # extract summer months
    mask = (sic.lat < -40.).compute() # need to carry out the computation because of dask chunks
    sic_sh = sic.where(mask, drop=True)
    df = findedge(sic_sh)
    df.to_csv(mname+'_edge_1930_1939.csv')


CMIP.MRI.MRI-ESM2-0.historical.SImon.gn
CMIP.CSIRO.ACCESS-ESM1-5.historical.SImon.gn
CMIP.CAS.CAS-ESM2-0.historical.SImon.gn
CMIP.IPSL.IPSL-CM6A-LR-INCA.historical.SImon.gn
CMIP.NCC.NorESM2-LM.historical.SImon.gn
CMIP.EC-Earth-Consortium.EC-Earth3-CC.historical.SImon.gn


  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]


CMIP.IPSL.IPSL-CM6A-LR.historical.SImon.gn
CMIP.AS-RCEC.TaiESM1.historical.SImon.gn
CMIP.NOAA-GFDL.GFDL-ESM4.historical.SImon.gn
CMIP.CAS.FGOALS-g3.historical.SImon.gn
CMIP.CCCma.CanESM5.historical.SImon.gn
CMIP.NCC.NorESM2-MM.historical.SImon.gn
CMIP.EC-Earth-Consortium.EC-Earth3.historical.SImon.gn


  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]


CMIP.EC-Earth-Consortium.EC-Earth3-AerChem.historical.SImon.gn
CMIP.EC-Earth-Consortium.EC-Earth3-Veg-LR.historical.SImon.gn
CMIP.NCC.NorCPM1.historical.SImon.gn
CMIP.CMCC.CMCC-ESM2.historical.SImon.gn
CMIP.SNU.SAM0-UNICON.historical.SImon.gn
CMIP.MPI-M.MPI-ESM1-2-HR.historical.SImon.gn
CMIP.CMCC.CMCC-CM2-HR4.historical.SImon.gn
CMIP.CSIRO-ARCCSS.ACCESS-CM2.historical.SImon.gn
CMIP.IPSL.IPSL-CM5A2-INCA.historical.SImon.gn
CMIP.MIROC.MIROC6.historical.SImon.gn
CMIP.MPI-M.MPI-ESM1-2-LR.historical.SImon.gn
CMIP.HAMMOZ-Consortium.MPI-ESM-1-2-HAM.historical.SImon.gn
CMIP.CMCC.CMCC-CM2-SR5.historical.SImon.gn


In [None]:
# group all the models to calculate statistics for the 4 months
# the longitude index is set to the range -180,180
# model data will be sorted
ilon = np.arange(0,360,0.5)
lon180 = np.where(ilon>180,ilon-360,ilon)
dfj = pd.DataFrame(index=lon180,columns=models)
dfj.index.name = 'Longitude'
dff = dfj.copy()
dfn = dfj.copy()
dfd = dfj.copy()

for m in models:
    df = pd.read_csv(m+'_edge_1930_1939.csv',index_col=0)
    dfj[m] = df['Jan'].values
    dff[m] = df['Feb'].values
    dfn[m] = df['Nov'].values
    dfd[m] = df['Dec'].values

# sort by longitude
dfj.sort_index(inplace=True)
dff.sort_index(inplace=True)
dfn.sort_index(inplace=True)
dfd.sort_index(inplace=True)

In [198]:
dfj.to_csv('CMIP6-26_SIedge_Jan_1930_1939.csv')
dff.to_csv('CMIP6-26_SIedge_Feb_1930_1939.csv')
dfn.to_csv('CMIP6-26_SIedge_Nov_1930_1939.csv')
dfd.to_csv('CMIP6-26_SIedge_Dec_1930_1939.csv')

In [122]:
# plot test figure
Mjan = dfj[best8].median(axis=1)
p75jan = dfj[best8].quantile(0.75,axis=1)
p25jan = dfj[best8].quantile(0.25,axis=1)
plt.figure()
Mjan.plot(label='Jan median')
p75jan.plot(color='C0',linestyle='--',label='75th p')
p25jan.plot(color='C0',linestyle='--',label='25th p')
plt.legend()