In [None]:
# usual imports
import os
import numpy as np
import matplotlib.pyplot as plt
from rail.utils.path_utils import RAILDIR
#from rail.estimation.algos.bpz_version.utils import RAIL_BPZ_DIR
from rail.pipelines.estimation.estimate_all import EstimatePipeline
from rail.core import common_params
from rail.pipelines.utils.name_factory import NameFactory, DataType, CatalogType, ModelType, PdfType
import qp
import ceci


In [None]:
#change to your rail location
RAIL_BPZ_DIR = "/path/to/your/rail/rail_bpz/src/"

### Define the bands, magnitude limits, and put them into a dictionary

In [None]:
bands = 'grizy'
maglims = [27.66, 27.25, 26.6, 26.24, 25.35]
maglim_dict={}
for band,limx in zip(bands, maglims):
    maglim_dict[f"HSC{band}_cmodel_dered"] = limx

### Setting common parameters to all the stages in the estimation



In [None]:
common_params.set_param_defaults(
    bands=[f'HSC{band}_cmodel_dered' for band in bands], # specify HSC bands
    err_bands=[f'{band}_cmodel_magerr' for band in bands], # specify HSC uncertainty bands
    nondetect_val=np.nan,
    ref_band='HSCi_cmodel_dered',
    redshift_col='specz_redshift',
    mag_limits=maglim_dict,
    zmax=6.0, # set zmax to 6 for HSC
)

### Set up the estimate pipeline

In [None]:
pipe = EstimatePipeline()

In [None]:
namer = NameFactory()

### Additional config update to specify the place to find the spectroscopic columns for the summarizer

In [None]:
pipe.estimate_somoclu.config.update(spec_groupname = "", redshift_colname = 'specz_redshift')
pipe.estimate_simplesom.config.update(spec_groupname = "", redshift_colname = 'specz_redshift')


### Specify input model for each stages, and the spec_input file (same as the input_file)

In [None]:
input_file = 'dered_223501_sz_match_pdr3_dud_NONDET.hdf5'


input_dict = dict(
    model_knn=os.path.join(namer.get_data_dir(DataType.model, ModelType.estimator), "model_knn.pkl"),
    model_simplenn=os.path.join(namer.get_data_dir(DataType.model, ModelType.estimator), "model_simplenn.pkl"),
    model_simplesom=os.path.join(namer.get_data_dir(DataType.model, ModelType.estimator), "model_simplesom.pkl"),
    model_somoclu=os.path.join(namer.get_data_dir(DataType.model, ModelType.estimator), "model_somoclu.pkl"),
    model_fzboost=os.path.join(namer.get_data_dir(DataType.model, ModelType.estimator), "model_FZBoost.hdf5"), #_fzboost
    model_trainz=os.path.join(namer.get_data_dir(DataType.model, ModelType.estimator), "model_trainz.pkl"),        
    input=input_file,
    spec_input = input_file,
)

In [None]:
pipe_info = pipe.initialize(input_dict, dict(output_dir='.', log_dir='.', resume=True), None)


### Save the pipeline to a yaml file

In [None]:
pipe.save('tmp_estimate_all.yml')

[For NERSC users!]

This won't work on jupyter.nersc.gov, for a test run, you need to 
1. Add "name: local" to the "site" section in the 'tmp_estimate_all.yml'
2. ssh into perlmutter.nersc.gov, set up the rail environment, and run "ceci tmp_estimate_all.yml"

In [None]:
import ceci
pr = ceci.Pipeline.read('tmp_estimate_all.yml')

In [None]:
pr.run()

# display $p(z)$ of the point estimators

### starting with simple NN

In [None]:
infile = './pdf/pz/output_simplenn.hdf5'

qp_ensemble_simplenn = qp.read(infile)


In [None]:
plt.figure()
plt.hist(qp_ensemble_simplenn.ancil['zmode'], bins=40)
plt.xlabel('z')
plt.show()

### Now read the spectroscopic data

In [None]:
import tables_io

In [None]:
sz_data = tables_io.read("dered_223501_sz_match_pdr3_dud_NONDET.hdf5")

In [None]:
specz = sz_data['specz_redshift']

Compare the mode of the photo-z PDF with the spectroscopic redshift

In [None]:
plt.figure(figsize = (6,6))
plt.scatter(specz, qp_ensemble_simplenn.ancil['zmode'],s = 0.01)
plt.xlabel('Spec z')
plt.ylabel('Simple NN Mode')
plt.xlim(0,3)
plt.ylim(0,3)
plt.plot([-1,10], [-1,10], '--', color = 'black', )

### Do the same for the Flex-z-boost

In [None]:
infile = './pdf/pz/output_FZBoost.hdf5'

qp_ensemble_flexzboost = qp.read(infile)

# print(qp_ensemble.npdf)
# print(qp_ensemble.ancil)

In [None]:
plt.figure(figsize = (6,6))
plt.scatter(specz, qp_ensemble_flexzboost.ancil['zmode'],s = 0.01)
plt.xlabel('Spec z')
plt.ylabel('Flex z boost Mode')
plt.xlim(0,3)
plt.ylim(0,3)
plt.plot([-1,10], [-1,10], '--', color = 'black')

## display $n(z)$

In [None]:
# directly reading the hdf5 files with qp

qp_single_nz_som = qp.read('./single_NZ_estimate_simplesom.hdf5')
qp_single_nz_somoclu = qp.read('./single_NZ_estimate_somoclu.hdf5')

In [None]:
z_grid = np.linspace(0,6,600)
nz_som_grid = qp_single_nz_som.pdf(z_grid)
nz_somoclu_grid = qp_single_nz_somoclu.pdf(z_grid)


In [None]:
# Part of the spactroscopic samples failed and have z=-99, we should exclude them. 
specz_good = specz[specz>0.0]

### Make a plot to compare the two summarizers and the true n(z)

In [None]:
plt.figure()
plt.plot(z_grid, nz_som_grid[0], label = 'Simple SOM')
plt.plot(z_grid, nz_somoclu_grid[0], label = 'Somoclu')
plt.hist(specz_good,density = True ,bins = 600,histtype = 'step', label = 'True (Spec-z)')
plt.xlim(-0.1,3)
plt.ylim(0,1.2)
plt.xlabel('z')
plt.ylabel('n(z)')
plt.legend()