# IPCC/AR6/WG1/Chapter 8 
## Stéphane Sénési

## Compute and/or plot ensemble statistics of relative changes 
- for mean and/or inter-annual standard deviation of basin-averaged variables, 
- over a list of basins and 
- for a time_slicing of a projection period, and 
- for a series of scenarios

### Results are json files (in a directory indicated below by variable 'outdir') hosting dictionnaries organized as  
- `changes[scenario][variable][time_stat][basin][ensemble_stat][slice] = stat_value` 

### and named
- `changes_scenario_dataVersion_version.json`

### and a figure named e.g.
- `mrro_rate_of_change_per_basin_dataVersion_version.png`

## Default settings (some may be overriden by Papermill - this would show in next cell in the execution output notebook)

In [11]:
do_test               = True

# This script has two phases, which can be activated separately
do_compute            = True
do_plot               = True

# Version number will be a suffix for data and figure filenames. Use e.g. "_V1" for legibility
version               = ""
plot_version          = ""
figure_name           = "Fig8-27"

# Model data versions stuff
#############################

data_versions_dir    = "/home/ssenesi/CAMMAC/select_data_versions"
# All models listed through next parameters as providing data for a scenario should be included :
data_versions_tag    = "20200918"
excluded_models      = [] 
included_models      = None       # Can be a list that limits models used

# Compute parameters
##########################

# Basins for changes computation. Value 'land' is also recognized
compute_basins      = [ "Amazon" , "Lena", "Yangtze" ,"Mississippi", "Danube", "Niger" ]
#compute_basins        = [ "Amazon" , "Lena", "Yangtze"  ]

# triplet lists of  [variable, table, time statistics] of interest 
variables            = [ [ "mrro","Lmon","mean" ], [ "mrro","Lmon","std" ] ]

# List of ensemble statistics computed for each variable 
stats_list          = [ "median", "mean","nq5","nq95","nq25","nq75","ens"]

# Computing changes implies defining a refereece period
ref_experiment      = "historical"
ref_period          = "1850-1900" 

# Define time slices for projection, for a list of projection epxeriments. 
# May include years belonging to ref_experiment duration, but scenario's begin 
# must match ref_experiment's end. 
scenarios          = [ "ssp585", "ssp245","ssp126" ]
periods_length     = 20
start_year         = 1901
last_year          = 2081  # i.e. last period's begin
step               = 10  # Not necessarily equal to periods_length !
#
# The directory holding output (json) files and figures
outdir             = "./figures"

# Additional parameters, for plot
###################################

# The plot script is tuned for two variables only
plot_variables      = [ ["mrro","Lmon","mean"], ["mrro","Lmon","std"] ]

plot_variable_label = "runoff"
plot_name1          = "mean"
plot_name2          = "variability"
# Basins to plot. Must be a sublist of compute_basins
plot_basins         = ["Amazon","Yangtze", "Lena"]
#plot_basins=["Mississippi", "Danube", "Niger"]

# Curves to plot, in right order. Must be a sublist of 'stats_list' 
plot_stats          = ["nq5","mean","nq95"]
plot_stats_label    = "ensemble mean, 5 and 95 percentiles"

# Parameters for periods to plot. Must be consistent with periods above. Can be a subset
plot_start_year     = 1901
plot_last_year      = 2081
plot_step           = 10


ch=dict(compute=True,house_keeping=False)

# Stable parameters
#######################

# We use CTRIP-V2 data for basins. 
# The only constraint on basins data is to be provided as a NetCDF file with as single 
# field having a distinct integer value for each basin; One must also provide here below 
# some mapping of integers to basin names for interesting basins 
basins_file        = "/home/ssenesi/CAMMAC/data/basins/num_bas_ctrip.nc"
# See colocated file rivnum05_new2.txt for a full table of basin numbers
# Entry "land" is necessary if wishing to compute integration over land 
basins_key         = {"land": -999, "Yangtze":11 , "Lena":8, "Amazon":1, "Mississippi":3 , 
                      "Danube":29, "Niger":9}
 
# Location of libraries
# Climaf version >= 1.2.13 (see https://climaf.readthedocs.io)
climaf_lib         = "/home/ssenesi/climaf_installs/climaf_running" 
# AR6/WGI/chapter8 CliMAF-based package
CAMMAC                = "/home/ssenesi/CAMMAC"




In [12]:
# for tests :
if do_test :
    do_plot=False
    periods_length=3
    start_year=2016
    last_year=2020
    version="_Vtest"
    compute_basins=[ "Amazon" ]
    variables=[ ["mrro","Lmon","mean"], ["mrro","Lmon","std"] ]
    scenarios=[ "ssp585" ]
    stats_list=[ "median" ]
    ch=dict(compute=True,house_keeping=False)
    included_models     = ["CNRM-CM6-1","IPSL-CM6A-LR"]


In [13]:
# These two commands have no effects when run outside a notebook
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [14]:
import json
import sys
import os, os.path
import subprocess

In [15]:
sys.path=[climaf_lib] + sys.path
from climaf.api import *
climaf.cache.stamping=False
climaf.driver.dig_hard_into_cache=False

In [16]:
sys.path.append(CAMMAC) 
from CAMMAClib.ancillary   import prettier_label, feed_dic
from CAMMAClib.mips_et_al  import read_versions_dictionnary, TSU_metadata
from CAMMAClib.changes     import stats_of_basins_changes

In [17]:
# Define derived variables
derive('CMIP6', 'P-E','minus','pr','evspsbl')
derive('CMIP6_extent', 'P-E','minus','pr','evspsbl')

# Fix sign issue with some models for evspsbl
calias('CMIP6','evspsbl',scale=-1,conditions={"model":["CAMS-CSM1-0","EC-Earth3","EC-Earth3-Veg"]})
calias('CMIP6_extent','evspsbl',scale=-1,conditions={"model":["CAMS-CSM1-0","EC-Earth3","EC-Earth3-Veg"]})

In [18]:
def init_slices(start_year,last_year,periods_length,step):
    slices=[] ; current=start_year
    while current <= last_year : 
        slices.append("%s-%s"%(current,current+periods_length-1))
        current+=step
    return slices

In [61]:
if do_compute :

    # Read dictionnary of data versions 
    data_versions=read_versions_dictionnary(data_versions_tag,data_versions_dir)
    metadata=""

    # Init time periods
    slices=init_slices(start_year,last_year,periods_length,step)

    basins_data={"basins":compute_basins, "basins_file":basins_file,"basins_key":basins_key}
    basins_data_globe={"basins":["globe"], "basins_file":"","basins_key":{}}
    #
    import os.path
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    #
    model_changes=dict()
    metadata=""
    for scenario in scenarios :
        stats_all_vars=dict()
        tas_models=set()
        for variable,table,time_stat in variables :
            #print scenario, variable, table, time_stat
            changes,models=stats_of_basins_changes(model_changes, ref_experiment, scenario, ref_period,variable, table, time_stat,
                                               data_versions,slices,stats_list,basins_data, included_models=included_models,
                                               excluded_models=excluded_models, must_have_vars=[("tas","Amon")],**ch)
            feed_dic(stats_all_vars,changes,variable,time_stat)
            metadata+=TSU_metadata([scenario,ref_experiment],models,variable,table,data_versions)
            tas_models=tas_models.union(models)
            print "tas_models=",tas_models
        # Next for tas. We use the union of per-variable models list 
        tas_models = [ model for model,real in tas_models ]
        tas_changes,models=stats_of_basins_changes(model_changes, ref_experiment, scenario, ref_period,"tas","Amon", "mean",
                                    data_versions,slices,["mean","ens"],basins_data_globe, included_models=tas_models,
                                    relative=False, **ch)
        feed_dic(stats_all_vars,tas_changes,"tas","mean")
        metadata+=TSU_metadata([scenario,ref_experiment],models,"tas","Amon",data_versions)
        # Write all variables results
        with open (outdir+"/changes_allvars_%s_%s%s.json"%(scenario,data_versions_tag,version),"w") as f :
            json.dump(stats_all_vars,f,separators=(',', ': '),indent=3)
    #
    with open("%s/%s%s_md"%(outdir,figure_name,version),"w") as f: f.write(metadata)
            

ssp585 mrro Lmon mean model CNRM-CM6-1-HR does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['CNRM-CM6-1', 'IPSL-CM6A-LR'] []
model MPI-ESM1-2-HR does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['CNRM-CM6-1', 'IPSL-CM6A-LR'] []
model UKESM1-0-LL does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['CNRM-CM6-1', 'IPSL-CM6A-LR'] []
model HadGEM3-GC31-LL does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['CNRM-CM6-1', 'IPSL-CM6A-LR'] []
model EC-Earth3-Veg does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['CNRM-CM6-1', 'IPSL-CM6A-LR'] []
model MPI-ESM1-2-LR does not fit set(['historical']) set(['ssp585', 'historical']) ['CNRM-CM6-1', 'IPSL-CM6A-LR'] []
model CMCC-CM2-HR4 does not fit set(['historical']) set(['ssp585', 'historical']) ['CNRM-CM6-1', 'IPSL-CM6A-LR'] []
model MRI-ESM2-0 does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['CNRM-CM6-1', 'IPSL

 CNRM-CM6-1  IPSL-CM6A-LR
tas_models= set([('IPSL-CM6A-LR', u'r1i1p1f1'), ('CNRM-CM6-1', u'r1i1p1f2')])
ssp585 tas Amon mean model CNRM-CM6-1-HR does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['IPSL-CM6A-LR', 'CNRM-CM6-1'] []
model MPI-ESM1-2-HR does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['IPSL-CM6A-LR', 'CNRM-CM6-1'] []
model UKESM1-0-LL does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['IPSL-CM6A-LR', 'CNRM-CM6-1'] []
model HadGEM3-GC31-LL does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['IPSL-CM6A-LR', 'CNRM-CM6-1'] []
model NorCPM1 does not fit set(['historical']) set(['ssp585', 'historical']) ['IPSL-CM6A-LR', 'CNRM-CM6-1'] []
model EC-Earth3-Veg does not fit set(['ssp585', 'historical']) set(['ssp585', 'historical']) ['IPSL-CM6A-LR', 'CNRM-CM6-1'] []
model MPI-ESM-1-2-HAM does not fit set(['historical']) set(['ssp585', 'historical']) ['IPSL-CM6A-LR', 'CNRM-CM6-1'] []
model MPI-

# Create a NetCDF file for Ncl plot script

In [None]:
if do_plot :
    
    import Nio

    # Init plot time periods
    plot_slices=init_slices(plot_start_year,plot_last_year,periods_length,plot_step)

    stats=dict()
    for scenario in scenarios :
        try :
            filename="%s/changes_allvars_%s_%s%s.json"%(outdir,scenario,data_versions_tag,version)
            with open (filename,"r") as f :
                stats[scenario]=json.load(f)
        except:
            raise ValueError("No cached data for scenario %s. Try setting 'do_compute=True'\n%s"%(scenario,filename))
    #
    fn="change_rate_basins_data.nc"
    !rm -f {fn}
    f=Nio.open_file(fn,"c")
    f.create_dimension('ssp'   ,len(scenarios))
    f.create_dimension('stat'  ,len(plot_stats))
    f.create_dimension('basin' ,len(plot_basins))
    f.create_dimension('period',len(plot_slices))
    #
    f.create_variable('tas','d',('ssp','period'))
    tas_mean=[[stats[scenario]["tas"]["mean"]["globe"]["mean"][p] 
               for p in plot_slices ] 
              for scenario in scenarios ] 
    #print tas_mean
    f.variables['tas'][:] = tas_mean
    #
    for variable,table,time_stat in plot_variables :
        var_stat=variable+"_"+time_stat
        f.create_variable(var_stat,'d',('ssp','basin','stat','period'))
        f.variables[var_stat][:] = [[[[ stats[scenario][variable][time_stat][basin][stat][p] 
                                       for p in plot_slices]
                                      for stat in plot_stats]
                                     for basin in plot_basins ] 
                                    for scenario in scenarios ] 
        # Store number of models for each var+stat and scenario
        #var_stat_nb=var_stat+"_nb"
        #f.create_variable(var_stat_nb,'d',('ssp','stat'))
        #f.variables[var_stat_nb][:] = [[ len(stats[scenario][variable][time_stat][plot_basins[0]["ens"][plot_slices[0]]])
        #                                  for stat in plot_stats]
        #                                for scenario in scenarios ] 
    #
    f.close()
    

# Launch Ncl script for plot

### A number of parameters for the plot or provided through Ncl command-line arguments, that can be fine-tuned here. 
### However some tuning of the Ncl script code must occur regarding ymin/ymax of each panel (depending on runoff range over each basin)

In [None]:
if do_plot :
    # Command below is needed on Ciclad due to (uncomplete ?) Nio install in Jerome's conda
    #  env which has an adverse impact on launched command environment for Ncl execution
    if "NCARG_NCARG" in os.environ :
        os.environ.pop("NCARG_NCARG")

    def ncl_strings_tab(it) :
        # Create a tab of strings from it, in Ncl syntax ; e.g. : (/"aa","bb"/)
        tab="(/"
        for val in it[0:-1] : tab+='"%s", '%val
        tab+='"%s"/)'%it[-1]
        return tab
    #
    figfile="rate_of_change_per_basin_vs_%s_%s%s"%(ref_period,data_versions_tag,plot_version)
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    #
    ncl_basins=ncl_strings_tab(plot_basins)
    ncl_vars=ncl_strings_tab([ "%s_%s"%(var,stat) for var,table,stat in variables ])
    pl=[ prettier_label[e] for e in scenarios ]
    ncl_experiment_labels=ncl_strings_tab(pl)
    #
    nb_models=29
    end=last_year+periods_length-1
    ncl_script=CAMMAC+"/notebooks/change_rate_basins.ncl"

    # Plot figure
    command="ncl -Q %s"%ncl_script +\
             " ' input_file = \"%s\"'"       %fn +\
             " ' figfile    = \"%s/%s\"'"       %(outdir,figfile) +\
             " ' names = (/\"%s\",\"%s\"/)'" %(plot_name1,plot_name2) +\
             " ' title = \"Rate of change in basin-scale %s %s and %s\"'"%(plot_variable_label,plot_name1,plot_name2) +\
             " ' xtitle = \"Warming above %s, from %s to %s\"'"%(ref_period,start_year,end) +\
             " ' ytitle = \"Change in basin-averaged %s and %s of %s, vs %s "%(plot_name1,plot_name2,plot_variable_label,ref_period)+\
             "(%%) ~Z75~~C~(%s models %s)\"'"%(nb_models,plot_stats_label) +\
             " ' vars = %s'"%ncl_vars+\
             " ' basins = %s'"%ncl_basins+\
             " ' experiments_labels = %s'"%(ncl_experiment_labels)+\
             " ' xmin = 0.0'"+\
             " ' xmax = 5.05'"
    print "command=",command
    out=subprocess.check_output(command,shell=True)
    if "OK" not in out[-3:] :
        print out
    else :
        os.system("cd %s ; ln -sf %s.png %s%s.png"%(outdir,figfile,figure_name,version))


# For Zhang figure scale
#'yminmax=(/(/(/-10,10/),(/-30.,30./)/),(/(/-10,5/),(/-20.,30./)/),(/(/0,35/),(/-10.,60./)/)  /)'
#'xmax=3.05'\

