# Emulating the data for MassiveNus

## Import Packages

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import time

In [2]:
from looti import dictlearn as dcl
from looti import datahandle as dhl
from looti import PlottingModule as pm

from looti import tools as too
from looti import PlottingModule as pm

In [3]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Import Data

The data is imported from pandas Dataframes which have been previously created using the `read_files` module.
The DataFrames contain the **k-grid** and the **power spectrum** at each redshift and each parameter variation, for the *extended model* and the *reference model*.

### Specify Paths

The DataFrames of the *extented model* and the *reference model* should be stored inside the same `data_folder`. The results of the experiments(plots, interpolation functions) are stored inside `the results_folder`

In [4]:
data_folder = '../../SimulationData/Massive_Nus/'
results_folder = './results/'

In [5]:
too.mkdirp(results_folder)    ## If directory does not exists, it is created here

### Load MassiveNus dataframes

The user has **two options** : provide the power **spectra** of the *extended* and *reference model* **OR** provide directly the **ratios** between these two models.

**Case 1** (spectra): user provides :
- datafile_ext : filename containing the power spectra of the extended model
- datafile_ref : filename containing the power spectra of the reference model (i.e. LCDM)

**Case 2** (ratios): user provides :
- datafile_ext : filename containing the ratios between the extended model and the reference model

In [6]:
###Name of the file for the external input data, without the extension
datafile_ext = 'Massive_Nus'
###Name of the file for the LCDM input data
datafile_ref = 'Massive_Nus_LCDM'

The following functions load the data into a object `emulation_data`. When the ratios are directly provide the user must indicate : `ratio_mode=True`

In [7]:
emulation_data = dhl.DataHandle( datafile_ext, data_folder, datafile_ref, num_parameters=3) 
emulation_data.read_csv_pandas() 

## Calculate power spectra ratios at all redshifts available

Available **redshifts** are stored in the attribute `z_vals`

In [8]:
# Available redshifts
emulation_data.z_vals

array([0.    , 0.2632, 0.5263, 0.7895, 1.0526, 1.3158, 1.5789, 1.8421,
       2.1053, 2.3684, 2.6316, 2.8947, 3.1579, 3.4211, 3.6842, 3.9474,
       4.2105, 4.4737, 4.7368, 5.    ])

The function `calculate_ratio_by_redshifts` computes the ratio between the *extended* and *reference* model at each **redshit** passed as argument.

The user can optionally decide to **normalize** the data by passing `normalize=False`. This option will force all the ratios to be equal to 1 at k = `pos_norm`

In [9]:
## Set normalize=False, since Fitting Formulae are already normalized
## First argument contains all the redshifts at which simulations are available
emulation_data.calculate_ratio_by_redshifts(emulation_data.z_vals,normalize=True)

# Define Parameters

Available parameters are stored in the attribute `emulation_data.extparam_vals`

In [10]:
### Available parameters 
emulation_data.extparam_vals

array([0.06271, 0.06522, 0.06773, 0.07024, 0.07275, 0.07526, 0.07778,
       0.0803 , 0.08282, 0.08535, 0.08788, 0.09041, 0.09295, 0.0955 ,
       0.09805, 0.1    , 0.10061, 0.10318, 0.10575, 0.10833, 0.11092,
       0.11351, 0.11612, 0.11874, 0.12136, 0.124  , 0.12665, 0.12931,
       0.13198, 0.13467, 0.13737, 0.14008, 0.14281, 0.14556, 0.14832,
       0.1511 , 0.15389, 0.15671, 0.15954, 0.1624 , 0.16527, 0.16817,
       0.17109, 0.17404, 0.17701, 0.18001, 0.18303, 0.18409, 0.18608,
       0.18917, 0.19228, 0.19543, 0.19861, 0.20183, 0.20235, 0.20508,
       0.20837, 0.21171, 0.2118 , 0.21508, 0.21846, 0.21851, 0.22198,
       0.22371, 0.2255 , 0.22808, 0.22907, 0.23187, 0.2327 , 0.23522,
       0.23638, 0.23825, 0.24013, 0.24102, 0.24359, 0.24394, 0.24598,
       0.24782, 0.24823, 0.25036, 0.25177, 0.25238, 0.25432, 0.2558 ,
       0.25616, 0.25794, 0.25966, 0.25991, 0.26132, 0.26292, 0.26411,
       0.26449, 0.26601, 0.26749, 0.2684 , 0.26894, 0.27035, 0.27174,
       0.27278, 0.27

In [11]:
### Available parameters 
n_train = 102 # Number of training vectors without taking acount the extrema 
n_test = 1 # Number of test vectors without taking acount the extrema
n_splits = 1  # Number of splits
test_indices=[[1]]  ## List of list of indices, one list per split

In [12]:
emulation_data.calculate_data_split(n_train=n_train, ##number of training vectors/ redshift
                                    n_test=n_test, ##number of test vectors
                                    verbosity=0,
                                    manual_split=True,
                                    test_indices=test_indices,
                                    train_redshift_indices=[0],##Indices of the redshifts used for the train vect.
                                    test_redshift_indices=[0]) ##Indices of the redshifts used for the test vect.

#  Define Classes (will be in a module)

In [14]:
npca = 80

In [15]:
Interpolation = Interpolating_function()

for i,redshift in enumerate(emulation_data.z_requested):

    ratios_predicted , emulation_data,interpolation_function = dcl.Predict_ratio(emulation_data,Operator = "PCA",
                                                          train_noise = 1e-3, ##noise for the GP's kernel
                                                          gp_n_rsts = 10,##times allowed to restart the optimiser
                                                          ncomp=npca , ##number of components
                                                          gp_const = 1, ##Constant for the RBF kernel
                                                          gp_length = 1 , ## Length for  GP 
                                                          interp_type='GP', ##kind of interpolator,e.g int1d or GP 
                                                          n_splits = 1, ##number of splits
                                                          test_indices=test_indices,
                                                         train_redshift_indices = [i],
                                                         test_redshift_indices = [i],##indices of the test vectors
                                                         min_k =1e-2,max_k=10e1,return_interpolator=True)
    function = Interpolating_function_redshift (emulation_data,interpolation_function,redshift,normalize=True)
    Interpolation.redshift_available.append(redshift)
    Interpolation.list_interpolation_function.append(function)
    

('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)
('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)
('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)
('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)
('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)
('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)
('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)
('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)
('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)
('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)
('Shape of PCA matrix: (100, 80)',)
('Number of PCA components: 80',)


KeyboardInterrupt: 

# Load and save the inperpolation function

In [None]:
import pickle
with open('interpolating_function_Massive_Nus', 'wb') as f:
    pickle.dump(Interpolation, f)

with open('interpolating_function_Massive_Nus', 'rb') as f:
    Interpolation_loaded = pickle.load(f)


In [None]:
plt.semilogx(np.power(10,emulation_data.masked_k_grid),[Interpolation_loaded.predict(0.0,k,[0.06271,0.38154,2.2004]) for k in np.power(10,emulation_data.masked_k_grid) ])

plt.semilogx(np.power(10,emulation_data.masked_k_grid),emulation_data.df_ext.loc["theo",0.0,"mnv",0.06271,"om",0.38154,"As",2.2004].values)