# Pre-proj: check bias versus trends in multimodel ensemble
## Get and store data to common grid 1°x1° from topographic file
conda env: `phd_v3` (in `envs/phd`)

In [1]:
# To reload external files automatically (ex: utils)
%load_ext autoreload
%autoreload 2

import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import proplot as plot # New plot library (https://proplot.readthedocs.io/en/latest/)
plot.rc['savefig.dpi'] = 300 # 1200 is too big! #https://proplot.readthedocs.io/en/latest/basics.html#Creating-figures
from scipy import stats
import xesmf as xe # For regridding (https://xesmf.readthedocs.io/en/latest/)

import sys
sys.path.insert(1, '/home/mlalande/notebooks/utils') # to include my util file in previous directory
import utils as u # my personal functions
u.check_python_version()
# u.check_virtual_memory()

3.8.5 | packaged by conda-forge | (default, Jul 24 2020, 01:25:15) 
[GCC 7.5.0]


## Set variables

In [2]:
period_past = slice('1979','2014')
seasons = ['Annual', 'DJFMA', 'JJAS']
# seasons = ['Annual']

# Make a extended version for regridding properly on the edges
latlim, lonlim = u.get_domain_HMA()
latlim_ext, lonlim_ext = slice(latlim.start-5, latlim.stop+5), slice(lonlim.start-5, lonlim.stop+5)

## Load topography

In [3]:
ds = xr.open_dataset('GMTED2010_15n240_1000deg.nc').drop_dims('nbounds').swap_dims(
    {'nlat': 'latitude', 'nlon': 'longitude'}).drop({'nlat', 'nlon'}).rename(
    {'latitude': 'lat', 'longitude': 'lon'}).sel(lat=latlim_ext, lon=lonlim_ext)
elevation = ds.elevation
elevation_std = ds.elevation_stddev

## Load models

In [4]:
list_models = u.get_model_names()
list_models

['BCC-CSM2-MR',
 'BCC-ESM1',
 'CAS-ESM2-0',
 'CESM2',
 'CESM2-FV2',
 'CESM2-WACCM',
 'CESM2-WACCM-FV2',
 'CNRM-CM6-1',
 'CNRM-CM6-1-HR',
 'CNRM-ESM2-1',
 'CanESM5',
 'CanESM5-CanOE',
 'GFDL-CM4',
 'GISS-E2-1-G',
 'GISS-E2-1-H',
 'HadGEM3-GC31-LL',
 'HadGEM3-GC31-MM',
 'IPSL-CM6A-LR',
 'MIROC-ES2L',
 'MIROC6',
 'MPI-ESM1-2-HR',
 'MPI-ESM1-2-LR',
 'MRI-ESM2-0',
 'NorESM2-LM',
 'SAM0-UNICON',
 'TaiESM1',
 'UKESM1-0-LL']

## tas

In [5]:
var = 'tas'
table = u.get_table(var)
label, units, levels, cmap, extend, \
    levels_diff, cmap_diff, extend_diff, \
    levels_bias, cmap_bias, extend_bias = u.get_var_infos(var)
label

'Near-Surface Air Temperature'

In [5]:
%%time

list_models = u.get_model_names()
list_models = list_models[17:]
# list_models = ['EC-Earth3']

for i, model in enumerate(list_models):
    print(model + ' ('+str(i+1)+'/'+str(len(list_models))+')')
    institute, grid, realization, calendar = u.get_model_infos(model, var)
    
    #################
    ### Load past ###
    #################
    
    realizations = !ls {'/bdd/CMIP6/CMIP/'+institute+'/'+model+'/historical/'}
    temp = []
    no_file = []
    
    for i, realization in enumerate(realizations):
        try:
            da = xr.open_mfdataset(
                '/bdd/CMIP6/CMIP/'+institute+'/'+model+'/historical/'+realization+'/'+table+'/'+var+'/'+grid+'/latest/*.nc', 
                combine='by_coords'
            ).sel(time=period_past, lat=latlim_ext, lon=lonlim_ext)[var]
            
            temp.append(da)
            
        except OSError:
            no_file.append(realization)
            
    for nf in no_file:
        realizations.remove(nf)
        
    print(realizations)
    
    da_past = xr.concat(temp, pd.Index(realizations, name='realization'))
    
    print(da_past.realization.size)
    
    np.testing.assert_equal((int(period_past.stop) - int(period_past.start) + 1)*12, da_past.time.size)
    
    # tas (K -> °C)
    #################################################################################

    da_past -= 273.15
        
    #################################################################################
    
    # Regrid
    regridder = xe.Regridder(da_past, elevation, 'bilinear', periodic=False, reuse_weights=True)
    da_past_regrid = regridder(da_past)
    
    if 'height' in da_past_regrid.coords: da_past_regrid = da_past_regrid.drop('height')
    
    # Store data on CICLAD
    path = '/data/mlalande/CMIP6_HMA_paper'
    da_past_regrid.sel(lat=latlim, lon=lonlim).to_netcdf(path+'/'+var+'/'+var+'_'+table+'_'+model+'_historical_ens_'+grid+'_197901-201412_HMA.nc')
        
    print('\n')

IPSL-CM6A-LR (1/10)
['r10i1p1f1', 'r11i1p1f1', 'r12i1p1f1', 'r13i1p1f1', 'r14i1p1f1', 'r15i1p1f1', 'r16i1p1f1', 'r17i1p1f1', 'r18i1p1f1', 'r19i1p1f1', 'r1i1p1f1', 'r20i1p1f1', 'r21i1p1f1', 'r22i1p1f1', 'r23i1p1f1', 'r24i1p1f1', 'r25i1p1f1', 'r26i1p1f1', 'r27i1p1f1', 'r28i1p1f1', 'r29i1p1f1', 'r2i1p1f1', 'r30i1p1f1', 'r31i1p1f1', 'r32i1p1f1', 'r3i1p1f1', 'r4i1p1f1', 'r5i1p1f1', 'r6i1p1f1', 'r7i1p1f1', 'r8i1p1f1', 'r9i1p1f1']
32
Reuse existing file: bilinear_28x25_35x60.nc


MIROC-ES2L (2/10)
['r10i1p1f2', 'r1i1p1f2', 'r2i1p1f2', 'r3i1p1f2', 'r4i1p1f2', 'r5i1p1f2', 'r6i1p1f2', 'r7i1p1f2', 'r8i1p1f2', 'r9i1p1f2']
10
Reuse existing file: bilinear_13x21_35x60.nc


MIROC6 (3/10)
['r10i1p1f1', 'r11i1p1f1', 'r12i1p1f1', 'r13i1p1f1', 'r14i1p1f1', 'r15i1p1f1', 'r16i1p1f1', 'r17i1p1f1', 'r18i1p1f1', 'r19i1p1f1', 'r1i1p1f1', 'r20i1p1f1', 'r21i1p1f1', 'r22i1p1f1', 'r23i1p1f1', 'r24i1p1f1', 'r25i1p1f1', 'r26i1p1f1', 'r27i1p1f1', 'r28i1p1f1', 'r29i1p1f1', 'r2i1p1f1', 'r30i1p1f1', 'r31i1p1f1', 'r32i1p

## snc

In [6]:
var = 'snc'
table = u.get_table(var)
label, units, levels, cmap, extend, \
    levels_diff, cmap_diff, extend_diff, \
    levels_bias, cmap_bias, extend_bias = u.get_var_infos(var)
label

'Snow Cover Extent'

In [8]:
%%time

list_models = u.get_model_names()
# list_models = list_models[17:]
# list_models = ['EC-Earth3']

for i, model in enumerate(list_models):
    print(model + ' ('+str(i+1)+'/'+str(len(list_models))+')')
    institute, grid, realization, calendar = u.get_model_infos(model, var)
    
    #################
    ### Load past ###
    #################
    
    realizations = !ls {'/bdd/CMIP6/CMIP/'+institute+'/'+model+'/historical/'}
    temp = []
    no_file = []
    
    for i, realization in enumerate(realizations):
        try:
            da = xr.open_mfdataset(
                '/bdd/CMIP6/CMIP/'+institute+'/'+model+'/historical/'+realization+'/'+table+'/'+var+'/'+grid+'/latest/*.nc', 
                combine='by_coords'
            ).sel(time=period_past, lat=latlim_ext, lon=lonlim_ext)[var]
            
            temp.append(da)
            
        except OSError:
            no_file.append(realization)
            
    for nf in no_file:
        realizations.remove(nf)
        
    print(realizations)
    
    da_past = xr.concat(temp, pd.Index(realizations, name='realization'))
    
    print(da_past.realization.size)
    
    np.testing.assert_equal((int(period_past.stop) - int(period_past.start) + 1)*12, da_past.time.size)
    
    # snc (0-1 to % when needed and take out wrong values)
    #################################################################################
    
    # Some models have values between 0 and 1 even if units in %    
    if da_past.max().values <= 1.1:
        da_past = da_past*100
    
    # Some models have wrong values (+-0.1 for numeric precision)
    da_past = da_past.where( (da_past >= -0.1) & (da_past <= 100.1) )
    
    #################################################################################
    
    # Regrid
    regridder = xe.Regridder(da_past, elevation, 'bilinear', periodic=False, reuse_weights=True)
    da_past_regrid = regridder(da_past)
    
    if 'height' in da_past_regrid.coords: da_past_regrid = da_past_regrid.drop('height')
    
    # Store data on CICLAD
    path = '/data/mlalande/CMIP6_HMA_paper'
    da_past_regrid.sel(lat=latlim, lon=lonlim).to_netcdf(path+'/'+var+'/'+var+'_'+table+'_'+model+'_historical_ens_'+grid+'_197901-201412_HMA.nc')
        
    print('\n')

BCC-CSM2-MR (1/27)
['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1']
3
Reuse existing file: bilinear_32x54_35x60.nc


BCC-ESM1 (2/27)
['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1']
3
Reuse existing file: bilinear_13x21_35x60.nc


CAS-ESM2-0 (3/27)
['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1', 'r4i1p1f1']
4
Reuse existing file: bilinear_24x42_35x60.nc


CESM2 (4/27)


  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(


['r10i1p1f1', 'r11i1p1f1', 'r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1', 'r4i1p1f1', 'r5i1p1f1', 'r6i1p1f1', 'r7i1p1f1', 'r8i1p1f1', 'r9i1p1f1']
11
Reuse existing file: bilinear_37x49_35x60.nc


CESM2-FV2 (5/27)


  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(


['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1']
3


  return func(*args, **kwargs)


Reuse existing file: bilinear_18x25_35x60.nc


CESM2-WACCM (6/27)


  new_vars[k] = decode_cf_variable(


['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1']
3
Reuse existing file: bilinear_37x49_35x60.nc


CESM2-WACCM-FV2 (7/27)


  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(


['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1']
3


  return func(*args, **kwargs)


Reuse existing file: bilinear_18x25_35x60.nc


CNRM-CM6-1 (8/27)
['r10i1p1f2', 'r11i1p1f2', 'r12i1p1f2', 'r13i1p1f2', 'r14i1p1f2', 'r15i1p1f2', 'r16i1p1f2', 'r17i1p1f2', 'r18i1p1f2', 'r19i1p1f2', 'r1i1p1f2', 'r20i1p1f2', 'r21i1p1f2', 'r22i1p1f2', 'r23i1p1f2', 'r24i1p1f2', 'r25i1p1f2', 'r26i1p1f2', 'r27i1p1f2', 'r28i1p1f2', 'r29i1p1f2', 'r2i1p1f2', 'r30i1p1f2', 'r3i1p1f2', 'r4i1p1f2', 'r5i1p1f2', 'r6i1p1f2', 'r7i1p1f2', 'r8i1p1f2', 'r9i1p1f2']
30
Reuse existing file: bilinear_25x42_35x60.nc


CNRM-CM6-1-HR (9/27)
['r1i1p1f2']
1
Reuse existing file: bilinear_70x121_35x60.nc


CNRM-ESM2-1 (10/27)
['r10i1p1f2', 'r11i1p1f2', 'r1i1p1f2', 'r2i1p1f2', 'r3i1p1f2', 'r4i1p1f2', 'r5i1p1f2', 'r6i1p1f2', 'r8i1p1f2', 'r9i1p1f2']
10
Reuse existing file: bilinear_25x42_35x60.nc


CanESM5 (11/27)
['r10i1p1f1', 'r10i1p2f1', 'r11i1p1f1', 'r11i1p2f1', 'r12i1p1f1', 'r12i1p2f1', 'r13i1p1f1', 'r13i1p2f1', 'r14i1p1f1', 'r14i1p2f1', 'r15i1p1f1', 'r15i1p2f1', 'r16i1p1f1', 'r16i1p2f1', 'r17i1p1f1', 'r17i1p2f1', '

  return func(*args, **kwargs)


Reuse existing file: bilinear_18x24_35x60.nc


GISS-E2-1-H (15/27)
['r10i1p1f1', 'r1i1p1f1', 'r1i1p1f2', 'r1i1p3f1', 'r1i1p5f1', 'r2i1p1f1', 'r2i1p1f2', 'r2i1p3f1', 'r2i1p5f1', 'r3i1p1f1', 'r3i1p1f2', 'r3i1p3f1', 'r3i1p5f1', 'r4i1p1f1', 'r4i1p1f2', 'r4i1p3f1', 'r5i1p1f1', 'r5i1p1f2', 'r5i1p3f1', 'r6i1p1f1', 'r7i1p1f1', 'r8i1p1f1', 'r9i1p1f1']
23
Reuse existing file: bilinear_18x24_35x60.nc


HadGEM3-GC31-LL (16/27)
['r1i1p1f3', 'r2i1p1f3', 'r3i1p1f3', 'r4i1p1f3']
4
Reuse existing file: bilinear_28x32_35x60.nc


HadGEM3-GC31-MM (17/27)
['r1i1p1f3', 'r2i1p1f3', 'r3i1p1f3', 'r4i1p1f3']
4


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


Reuse existing file: bilinear_63x72_35x60.nc


IPSL-CM6A-LR (18/27)
['r10i1p1f1', 'r11i1p1f1', 'r12i1p1f1', 'r13i1p1f1', 'r14i1p1f1', 'r15i1p1f1', 'r16i1p1f1', 'r17i1p1f1', 'r18i1p1f1', 'r19i1p1f1', 'r1i1p1f1', 'r20i1p1f1', 'r21i1p1f1', 'r22i1p1f1', 'r23i1p1f1', 'r24i1p1f1', 'r25i1p1f1', 'r26i1p1f1', 'r27i1p1f1', 'r28i1p1f1', 'r29i1p1f1', 'r2i1p1f1', 'r30i1p1f1', 'r31i1p1f1', 'r32i1p1f1', 'r3i1p1f1', 'r4i1p1f1', 'r5i1p1f1', 'r6i1p1f1', 'r7i1p1f1', 'r8i1p1f1', 'r9i1p1f1']
32
Reuse existing file: bilinear_28x25_35x60.nc


MIROC-ES2L (19/27)
['r10i1p1f2', 'r1i1p1f2', 'r2i1p1f2', 'r3i1p1f2', 'r4i1p1f2', 'r5i1p1f2', 'r6i1p1f2', 'r7i1p1f2', 'r8i1p1f2', 'r9i1p1f2']
10
Reuse existing file: bilinear_13x21_35x60.nc


MIROC6 (20/27)
['r10i1p1f1', 'r11i1p1f1', 'r12i1p1f1', 'r13i1p1f1', 'r14i1p1f1', 'r15i1p1f1', 'r16i1p1f1', 'r17i1p1f1', 'r18i1p1f1', 'r19i1p1f1', 'r1i1p1f1', 'r20i1p1f1', 'r21i1p1f1', 'r22i1p1f1', 'r23i1p1f1', 'r24i1p1f1', 'r25i1p1f1', 'r26i1p1f1', 'r27i1p1f1', 'r28i1p1f1', 'r29i1p1

  return func(*args, **kwargs)


Reuse existing file: bilinear_28x32_35x60.nc


CPU times: user 3min 17s, sys: 42.6 s, total: 3min 59s
Wall time: 20min 46s


## pr

In [14]:
var = 'pr'
table = u.get_table(var)
label, units, levels, cmap, extend, \
    levels_diff, cmap_diff, extend_diff, \
    levels_bias, cmap_bias, extend_bias = u.get_var_infos(var)
label

'Total Precipitation'

In [17]:
%%time

list_models = u.get_model_names()
list_models = list_models[3:]
# list_models = ['EC-Earth3']

for i, model in enumerate(list_models):
    print(model + ' ('+str(i+1)+'/'+str(len(list_models))+')')
    institute, grid, realization, calendar = u.get_model_infos(model, var)
    
    #################
    ### Load past ###
    #################
    
    realizations = !ls {'/bdd/CMIP6/CMIP/'+institute+'/'+model+'/historical/'}
    temp = []
    no_file = []
    
    for i, realization in enumerate(realizations):
        try:
            if model == 'CESM2' and realization == 'r9i1p1f1':
                da = xr.open_mfdataset(
                    '/bdd/CMIP6/CMIP/'+institute+'/'+model+'/historical/'+realization+'/'+table+'/'+var+'/'+grid+'/latest/*185001-201412.nc', 
                    combine='by_coords'
                ).sel(time=period_past, lat=latlim_ext, lon=lonlim_ext)[var]
            else:
                da = xr.open_mfdataset(
                    '/bdd/CMIP6/CMIP/'+institute+'/'+model+'/historical/'+realization+'/'+table+'/'+var+'/'+grid+'/latest/*.nc', 
                    combine='by_coords'
                ).sel(time=period_past, lat=latlim_ext, lon=lonlim_ext)[var]
            
            temp.append(da)
            
        except OSError:
            no_file.append(realization)
            
    for nf in no_file:
        realizations.remove(nf)
        
    print(realizations)
    
    da_past = xr.concat(temp, pd.Index(realizations, name='realization'))
    
    print(da_past.realization.size)
    
    np.testing.assert_equal((int(period_past.stop) - int(period_past.start) + 1)*12, da_past.time.size)
    
    # pr (mm/s -> mm/day)
    #################################################################################
    
    da_past = da_past * 86400
    
    #################################################################################
    
    # Regrid
    regridder = xe.Regridder(da_past, elevation, 'bilinear', periodic=False, reuse_weights=True)
    da_past_regrid = regridder(da_past)
    
    if 'height' in da_past_regrid.coords: da_past_regrid = da_past_regrid.drop('height')
    
    # Store data on CICLAD
    path = '/data/mlalande/CMIP6_HMA_paper'
    da_past_regrid.sel(lat=latlim, lon=lonlim).to_netcdf(path+'/'+var+'/'+var+'_'+table+'_'+model+'_historical_ens_'+grid+'_197901-201412_HMA.nc')
        
    print('\n')

CESM2 (1/24)


  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(


['r10i1p1f1', 'r11i1p1f1', 'r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1', 'r4i1p1f1', 'r5i1p1f1', 'r6i1p1f1', 'r7i1p1f1', 'r8i1p1f1', 'r9i1p1f1']
11
Reuse existing file: bilinear_37x49_35x60.nc


CESM2-FV2 (2/24)


  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(


['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1']
3
Reuse existing file: bilinear_18x25_35x60.nc


CESM2-WACCM (3/24)


  new_vars[k] = decode_cf_variable(


['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1']
3
Reuse existing file: bilinear_37x49_35x60.nc


CESM2-WACCM-FV2 (4/24)


  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(


['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1']
3
Reuse existing file: bilinear_18x25_35x60.nc


CNRM-CM6-1 (5/24)
['r10i1p1f2', 'r11i1p1f2', 'r12i1p1f2', 'r13i1p1f2', 'r14i1p1f2', 'r15i1p1f2', 'r16i1p1f2', 'r17i1p1f2', 'r18i1p1f2', 'r19i1p1f2', 'r1i1p1f2', 'r20i1p1f2', 'r21i1p1f2', 'r22i1p1f2', 'r24i1p1f2', 'r25i1p1f2', 'r26i1p1f2', 'r27i1p1f2', 'r28i1p1f2', 'r29i1p1f2', 'r2i1p1f2', 'r30i1p1f2', 'r3i1p1f2', 'r4i1p1f2', 'r5i1p1f2', 'r6i1p1f2', 'r7i1p1f2', 'r8i1p1f2', 'r9i1p1f2']
29
Reuse existing file: bilinear_25x42_35x60.nc


CNRM-CM6-1-HR (6/24)
['r1i1p1f2']
1
Reuse existing file: bilinear_70x121_35x60.nc


CNRM-ESM2-1 (7/24)
['r10i1p1f2', 'r11i1p1f2', 'r1i1p1f2', 'r2i1p1f2', 'r3i1p1f2', 'r4i1p1f2', 'r5i1p1f2', 'r7i1p1f2', 'r8i1p1f2', 'r9i1p1f2']
10
Reuse existing file: bilinear_25x42_35x60.nc


CanESM5 (8/24)
['r10i1p1f1', 'r10i1p2f1', 'r11i1p1f1', 'r11i1p2f1', 'r12i1p1f1', 'r12i1p2f1', 'r13i1p1f1', 'r13i1p2f1', 'r14i1p1f1', 'r14i1p2f1', 'r15i1p1f1', 'r15i1p2f1', 'r16i1p1f1', 'r16i1p2f1', 'r1

In [12]:
'/bdd/CMIP6/CMIP/'+institute+'/'+model+'/historical/'+realization+'/'+table+'/'+var+'/'+grid+'/'

'/bdd/CMIP6/CMIP/NCAR/CESM2/historical/r9i1p1f1/Amon/pr/gn/'