# Import Module

In [4]:
# Computational modules 
%matplotlib inline
import xarray as xr
import glob
import os
import numpy as np
import netCDF4
from netCDF4 import Dataset
import pandas as pd
import re
from array import array
from pylab import *
#import geopandas
from eofs.xarray import Eof
from eofs.multivariate.standard import MultivariateEof
import random

# Plotting modules 
import matplotlib.pyplot as plt
#from mpl_toolkits.basemap import Basemap
import pandas.plotting
import matplotlib.ticker as ticker
import seaborn as sns
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from matplotlib.colors import BoundaryNorm
from cartopy.util import add_cyclic_point

# Scikit-learn
from sklearn import linear_model
from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
from sklearn import preprocessing
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.neural_network import MLPRegressor
from scipy.optimize import minimize
from scipy.optimize import dual_annealing
from sklearn.decomposition import PCA

# Settings

### Variables

In [5]:
variables = ['tas', 'pr', 'psl', 'SW', 'LW']
var_ceres = ['rsdt','rsut', 'rlut']
truncations = [18, 18, 8, 28, 22]
TITLE = 'Multi-variate'
ylabel = '$E_{tot}$'

### Paths

In [6]:
path_official='/data/scratch/globc/peatier/CMIP6/CNRM-CM6-1/CFMIP/amip/'
path_PPE='/data/scratch/globc/peatier/PPE/CNRM-CM6-1_PPE/'
path_files='/data/home/globc/peatier/PPE/CNRMppe_error_decomposition/files/'
path_file_npy = '/data/home/globc/peatier/PPE/CNRMppe_save/PPE/ENSEMBLE2/files/npy/'

### List of members

In [7]:
nb_p_list = ['p311', 'p312', 'p314', 'p316',
                    'p317', 'p319', 'p320', 'p321', 'p322', 'p324', 'p325', 'p326', 
                    'p329', 'p330', 'p331', 'p332', 'p335', 'p336', 'p337' ,'p338', 
                    'p340', 'p341', 'p343', 'p344', 'p346', 'p347', 'p348', 'p349', 
                    'p350', 'p353', 'p355', 'p357', 'p359', 'p360', 
                    'p361', 'p363', 'p365', 'p367', 'p368', 'p369', 
                    'p372', 'p373', 'p374', 'p375', 'p376', 'p378', 'p381', 'p382', 
                    'p384', 'p386', 'p388', 'p389', 'p391', 'p392', 'p393', 
                    'p394', 'p395', 'p396', 'p398', 'p399', 'p400', 'p404', 
                    'p406', 'p407', 'p409', 'p410', 'p411', 'p412',
                    'p414','p416',
                    'p413','p419','p424','p426','p428','p421','p423',
                    'p425','p427','p429','p430','p436','p438','p431','p433',
                    'p442','p446','p443','p445','p447',
                    'p452','p454','p456','p458','p457','p459',
                    'p460','p465','p467','p469',
                    'p470','p471']

len(nb_p_list)

102

# Functions

In [8]:
def get_3D_tas_xarr(path, filename, variables):
#    “”"
#    This function read the netCDF file of monthly data, compute the radiative budget, perform a yearly mean and 
#    return a dataframe
#    “”"
    # First step : download the data into dataframe
    file = xr.open_mfdataset(path+filename,combine='by_coords')
    #
    # Second step : compute the annual average 
    df = file[variables].mean('time', keep_attrs=True)
    tas = df['tas']
    #
    return tas

In [9]:
def get_3D_pr_xarr(path, filename, variables):

    # First step : download the data into dataframe
    file = xr.open_mfdataset(path+filename,combine='by_coords')
    #
    # Second step : compute the annual average 
    df = file[variables].mean('time', keep_attrs=True)
    pr = df['pr']*86400 
    #
    return pr

In [10]:
def get_3D_psl_xarr(path, filename, variables):

    # First step : download the data into dataframe
    file = xr.open_mfdataset(path+filename,combine='by_coords')
    
    # Second step : compute the annual average 
    df = file[variables].mean('time', keep_attrs=True)
    psl = df['psl']
    
    return psl

In [11]:
def get_3D_LW_xarr(path, filename, variables):

    # First step : download the data into dataframe
    file = xr.open_mfdataset(path+filename,combine='by_coords')
    
    # Second step : compute the annual average 
    df = file[variables].mean('time', keep_attrs=True)
    LW = df['rlut']
    
    return LW

In [12]:
def get_3D_SW_xarr(path, filename, variables):

    # First step : download the data into dataframe
    file = xr.open_mfdataset(path+filename,combine='by_coords')
    
    # Second step : compute the annual average 
    df = file[variables].mean('time', keep_attrs=True)
    SW = df['rsut']# - df['rsdt']
    
    return SW

In [13]:
def load_monthly_clim(path, filename, variables) :
    
    file = xr.open_mfdataset(path+filename,combine='by_coords')
    df=file[variables].to_dataframe()
    
    # Compute Climatological Annual Cycle :
    df1=df.reset_index(level=['time', 'lat', 'lon'])
    df1['year']=pd.DatetimeIndex(df1['time']).year
    df1['month']=pd.DatetimeIndex(df1['time']).month
    
    #list_ind = []
    #cpt=0
    #for i in df1['year'] : 
    #    if i>1981 :
    #        list_ind.append(cpt)
    #        cpt+=1
    #    else : 
    #        cpt+=1
            
    #df2 = df1.drop(list_ind)
    df_mean=df1.groupby(['month', 'lat', 'lon']).mean()
    df_mean=df_mean.drop(columns='year')
    
    return df_mean

# Get data 

In [14]:
for var in variables :
    W_eof_2D = np.load(path_files+'npy/W_eof_2D_'+str(var)+'.npy')
    W_eof_3D = np.load(path_files+'npy/W_eof_3D_'+str(var)+'.npy')
    W_rmse_2D = np.load(path_files+'npy/W_rmse_2D_'+str(var)+'.npy')

## Reference simulations p1

In [15]:
# Reference simulation
path = path_PPE+'ENSEMBLE1/CNRM-CM6-1_amip_PPE/CNRM-CM6-1_amip_r1i1p1f2/'
filename = '*_CNRM-CM6-1_amip_*.nc'

p1_amip = dict()
diff_p1 = dict()
X_p1_w = dict()

for var in ['tas', 'psl'] :
    print(var)
    p1_amip[var] = load_monthly_clim(path, filename, var).groupby(['lat','lon']).mean().to_xarray()
    
### --- CERES data

df_ceres = load_monthly_clim(path, filename, var_ceres)

var = 'SW'
df_ceres[var] = df_ceres['rsut']# + df_ceres['rsut'] 
p1_amip[var] = df_ceres[var].groupby(['lat','lon']).mean().to_xarray()
print(var)

var = 'LW'
df_ceres[var] =  df_ceres['rlut']
p1_amip[var] = df_ceres[var].groupby(['lat','lon']).mean().to_xarray()
print(var)
 
### --- Precipitation pr 

var = 'pr'
print(var)
p1_amip[var] = load_monthly_clim(path, filename, var).groupby(['lat','lon']).mean().to_xarray()


tas
psl
SW
LW
pr


In [16]:
p1_amip['SW']

##  PPE members

In [17]:
# PPE amip simulations 
filename='*_amip_*.nc'
N=len(nb_p_list)
amip_tas = {}
amip_pr = {}
amip_psl = {}
amip_SW = {}
amip_LW = {}

i = 0
while i<N:
    nb_p = nb_p_list[i]
    print(nb_p)
    path = path_PPE+'ENSEMBLE2/CNRM-CM6-1_amip_LHS500_PPE/CNRM-CM6-1_amip_r1i1'+str(nb_p)+'f2/'
    amip_tas[str(nb_p)] = get_3D_tas_xarr(path, filename, ['tas'])
    amip_pr[str(nb_p)] = get_3D_pr_xarr(path, filename, ['pr'])
    amip_psl[str(nb_p)] = get_3D_psl_xarr(path, filename, ['psl'])
    amip_SW[str(nb_p)] = get_3D_SW_xarr(path, filename, var_ceres)
    amip_LW[str(nb_p)] = get_3D_LW_xarr(path, filename, var_ceres)
    i=i+1

p311
p312
p314
p316
p317
p319
p320
p321
p322
p324
p325
p326
p329
p330
p331
p332
p335
p336
p337
p338
p340
p341
p343
p344
p346
p347
p348
p349
p350
p353
p355
p357
p359
p360
p361
p363
p365
p367
p368
p369
p372
p373
p374
p375
p376
p378
p381
p382
p384
p386
p388
p389
p391
p392
p393
p394
p395
p396
p398
p399
p400
p404
p406
p407
p409
p410
p411
p412
p414
p416
p413
p419
p424
p426
p428
p421
p423
p425
p427
p429
p430
p436
p438
p431
p433
p442
p446
p443
p445
p447
p452
p454
p456
p458
p457
p459
p460
p465
p467
p469
p470
p471


In [18]:
n=len(nb_p_list)
amip_tas_list=[0]*(n+1)
amip_tas_list[0] = p1_amip['tas']['tas']
amip_pr_list=[0]*(n+1)
amip_pr_list[0] = p1_amip['pr']['pr']
amip_psl_list=[0]*(n+1)
amip_psl_list[0] = p1_amip['psl']['psl']
amip_SW_list=[0]*(n+1)
amip_SW_list[0] = p1_amip['SW']#['SW']
amip_LW_list=[0]*(n+1)
amip_LW_list[0] = p1_amip['LW']#['LW']

i = 0
while i<n:
    nb_p = nb_p_list[i]
    #print(nb_p)
    tmp = amip_tas[str(nb_p)]
    amip_tas_list[i+1] = tmp
    
    tmp = amip_pr[str(nb_p)]
    amip_pr_list[i+1] = tmp
    
    tmp = amip_psl[str(nb_p)]
    amip_psl_list[i+1] = tmp
    
    tmp = amip_SW[str(nb_p)]
    amip_SW_list[i+1] = tmp
    
    tmp = amip_LW[str(nb_p)]
    amip_LW_list[i+1] = tmp
    i+=1

In [19]:
matrix = {}
matrix['tas'] = xr.combine_nested(amip_tas_list, concat_dim=['time'])
matrix['pr'] = xr.combine_nested(amip_pr_list, concat_dim=['time'])
matrix['psl'] = xr.combine_nested(amip_psl_list, concat_dim=['time'])
matrix['SW'] = xr.combine_nested(amip_SW_list, concat_dim=['time'])
matrix['LW'] = xr.combine_nested(amip_LW_list, concat_dim=['time'])

In [20]:
## --PPE simulations
mean = {}
anom = {}
anom_w = {}
for var in variables :
    print(var)
    mean[var] = matrix[var].mean(axis=0)
    anom[var] = matrix[var] - mean[var]
    anom_w[var] = (anom[var]*W_eof_3D)

tas
pr
psl
SW
LW


In [21]:
anom_w[var]

Unnamed: 0,Array,Chunk
Bytes,27.00 MB,262.14 kB
Shape,"(103, 128, 256)","(1, 128, 256)"
Count,961 Tasks,103 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 27.00 MB 262.14 kB Shape (103, 128, 256) (1, 128, 256) Count 961 Tasks 103 Chunks Type float64 numpy.ndarray",256  128  103,

Unnamed: 0,Array,Chunk
Bytes,27.00 MB,262.14 kB
Shape,"(103, 128, 256)","(1, 128, 256)"
Count,961 Tasks,103 Chunks
Type,float64,numpy.ndarray


In [59]:
# Create an EOF solver to do the EOF analysis. 
solver = {}
for var in variables :
    print(var)
    solver[var] = Eof(anom_w[var], center=False, weights=None)

tas
pr
psl
SW
LW


# Save the EOF Solver

In [60]:
import pickle
path = path_files+'pkl/'
for var in variables :
    print(var)
    with open(path+'solver_'+var+'.pkl', 'wb') as f:
        pickle.dump(solver[var], f)
        f.close()

tas
pr
psl
SW
LW


In [61]:
## Save the MEAN 
path = path_files+'nc/'
for var in variables :
    filename = 'CNRMppe_decomposition_mean_'+str(var)+'.nc'
    mean[var].to_netcdf(path+filename)

# EOF solver can be loaded from here

In [None]:
import pickle
path = path_files+'pkl/'
solver = {}
for var in variables :
    print(var)

    # open a file, where you stored the pickled data
    file = open(path+'solver_'+var+'.pkl', 'rb')

    # dump information to that file
    solver[var] = pickle.load(file)

    # close the file
    file.close()