# Latin Hypercube Sampling

## Import modules

In [1]:
# Computational modules 
%matplotlib inline
import xarray as xr
import glob
import os
import numpy as np
import netCDF4
from netCDF4 import Dataset
import pandas as pd
import re
from array import array
from pylab import *
from smt.sampling_methods import LHS
from pyDOE import *

# Plotting modules 
import matplotlib.pyplot as plt
#from mpl_toolkits.basemap import Basemap
import pandas.plotting
import matplotlib.ticker as ticker
# scatter plot matrix des variables quantitatives
from pandas.plotting import scatter_matrix
import seaborn as sns; sns.set()

# Scikit-learn
from sklearn import linear_model
from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
from sklearn import preprocessing
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.neural_network import MLPRegressor

## Import functions

In [2]:
import sys
sys.path.append('/data/home/globc/peatier/CNRMppe')
import Fonctions
from Fonctions import get_wavg_budget_df
from Fonctions import wavg 
from Fonctions import plotlines_Xdf
from Fonctions import plotlines_1df
from Fonctions import Deltas_Lambda
from Fonctions import get_3D_budget_xarr
from Fonctions import get_3D_xarr
from Fonctions import get_3D_SW_xarr
from Fonctions import get_3D_LW_xarr
#from Fonctions import pairplot

## Latin Hypercube Sampling

In [49]:
# Read the interval parameters calues from file 'param.csv' 
# Then arrange nicely the data in dataframe and change data type of the values
param = pd.read_csv("/data/home/globc/peatier/CNRMppe/PPE/files/csv/param.csv",sep=' ', header=None)
param_limit = param.set_index(0).transpose()
param_limit

Unnamed: 0,AKN,ALPHAT,ALD,ALMAVE,AGREF,AGRE1,AGRE2,RAUTEFR,RQLCR,RAUTEFS,...,VVN,VVX,ALFX,FNEBC,GCTURB,RLWINHF_ICE,RLWINHF_LIQ,RSWINHF_ICE,RSWINHF_LIQ,REFLCAPE
1,0.06,0.5,0.5,0,-0.5,0,0,0.0005,5e-05,0.0005,...,-1,-25,0.01,0,1e-05,0.5,0.5,0.5,0.5,0.2
2,0.28,3,3,30,-0.01,10,10,0.01,0.001,0.01,...,-5,-50,0.1,20,0.0001,1,1,1,1,10
3,0.126,1.13,1.18,10,-0.36,5.5,0,0.001,0.0002,0.0052,...,-2,-35,0.04,10,5e-05,0.9,0.9,0.71,0.71,2
4,linear,linear,linear,linear,linear,linear,linear,linear,linear,linear,...,linear,linear,linear,linear,linear,linear,linear,linear,linear,linear


In [50]:
# Load the parameter names of the ENSEMBLE 3
param_names = np.load('/data/home/globc/peatier/CNRMppe/PPE/files/npy/ENSEMBLE3_param_names.npy')

In [51]:
ENSEMBLE3_param_limit = param_limit[param_names]
ENSEMBLE3_param_limit

Unnamed: 0,ALMAVE,VVX,RSWINHF_ICE,FNEBC,RQLCR
1,0,-25,0.5,0,5e-05
2,30,-50,1,20,0.001
3,10,-35,0.71,10,0.0002
4,linear,linear,linear,linear,linear


In [52]:
# Set the limits of each parameter as input of the hypercube :

N=len(param_names)
xlimits=[[1,1]]*N
i=0
while i<N:
    name = param_names[i]
    xmin = ENSEMBLE3_param_limit[name].iloc[0]
    xmax = ENSEMBLE3_param_limit[name].iloc[1]
    limit = [xmin, xmax]
    xlimits[i] = limit
    i+=1
    
Xlim = np.array(xlimits)
Xlim

array([[ 0.0e+00,  3.0e+01],
       [-2.5e+01, -5.0e+01],
       [ 5.0e-01,  1.0e+00],
       [ 0.0e+00,  2.0e+01],
       [ 5.0e-05,  1.0e-03]])

In [53]:
# -------------- Executing the Latin Hypercube Sampling : 

sampling = LHS(xlimits=Xlim, criterion = 'maximin')

num = 1000
x = sampling(num)

#print(x[10])

df = pd.DataFrame(x, columns=param_names)
#df['RQICRMIN']

# ------ Print the dataframe
df

Unnamed: 0,ALMAVE,VVX,RSWINHF_ICE,FNEBC,RQLCR
0,7.634496,-49.863614,0.708404,9.362725,0.000211
1,29.308568,-36.647492,0.833889,18.309743,0.000247
2,8.359460,-32.459837,0.849512,5.054074,0.000483
3,10.686554,-41.416682,0.884180,14.584483,0.000751
4,26.982178,-47.095902,0.922023,12.755200,0.000892
...,...,...,...,...,...
995,6.693891,-48.212823,0.702129,4.430501,0.000434
996,14.567037,-27.464819,0.817780,8.218017,0.000760
997,25.781998,-36.504986,0.954220,2.724273,0.000200
998,23.780052,-42.957170,0.640838,3.584646,0.000231


In [54]:
# Standardiser les valeurs des paramètres -
# normalize data, remove the mean and divide by the standard deviation :

copy_x = copy(x)
df_standard = pd.DataFrame(copy_x, columns=param_names)
i=0 
N = len(param_names)
while i<N:
    name = param_names[i]
    #print(name)
    mean = df_standard[str(name)].mean() 
    std = df_standard[str(name)].std()
    #print(param[str(name)])
    df_standard[str(name)] = df_standard[str(name)]-mean
    df_standard[str(name)] = df_standard[str(name)]/std
    i=(i+1)
    
df_standard

Unnamed: 0,ALMAVE,VVX,RSWINHF_ICE,FNEBC,RQLCR
0,-0.850020,-1.712192,-0.287983,-0.110319,-1.145816
1,1.651395,0.118123,0.580926,1.438588,-1.014183
2,-0.766352,0.698076,0.689107,-0.856232,-0.151368
3,-0.497781,-0.542367,0.929162,0.793672,0.824692
4,1.382905,-1.328889,1.191201,0.476986,1.339082
...,...,...,...,...,...
995,-0.958576,-1.483572,-0.331436,-0.964185,-0.331375
996,-0.049932,1.389842,0.469377,-0.308490,0.857708
997,1.244392,0.137859,1.414144,-1.259567,-1.185928
998,1.013346,-0.755711,-0.755842,-1.110619,-1.072075


In [55]:
df

Unnamed: 0,ALMAVE,VVX,RSWINHF_ICE,FNEBC,RQLCR
0,7.634496,-49.863614,0.708404,9.362725,0.000211
1,29.308568,-36.647492,0.833889,18.309743,0.000247
2,8.359460,-32.459837,0.849512,5.054074,0.000483
3,10.686554,-41.416682,0.884180,14.584483,0.000751
4,26.982178,-47.095902,0.922023,12.755200,0.000892
...,...,...,...,...,...
995,6.693891,-48.212823,0.702129,4.430501,0.000434
996,14.567037,-27.464819,0.817780,8.218017,0.000760
997,25.781998,-36.504986,0.954220,2.724273,0.000200
998,23.780052,-42.957170,0.640838,3.584646,0.000231


In [56]:
# Save the numpy array in a file
np.save('/data/home/globc/peatier/CNRMppe/PPE/files/npy/ENSEMBLE3_param_standard.npy', df_standard)

In [57]:
# Save the numpy array in a file  
np.save('/data/home/globc/peatier/CNRMppe/PPE/files/npy/ENSEMBLE3_param.npy', df)

In [58]:
# Save the numpy array in a text file
np.savetxt('/data/home/globc/peatier/CNRMppe/PPE/files/txt/ENSEMBLE3_param.txt', df.values)