In [None]:
# usual imports
import os
import numpy as np
import matplotlib.pyplot as plt
#from rail.estimation.algos.bpz_version.utils import RAIL_BPZ_DIR
from rail.pipelines.estimation.estimate_all import EstimatePipeline
from rail.core import common_params
from rail.utils import catalog_utils
import qp
import ceci


### Pick the relevant set of parameter names, etc

In [None]:
catalog_tag = 'hsc'

### Setting common parameters to all the stages in the estimation

In [None]:
catalog_utils.apply_defaults(catalog_tag)

### Set up the estimate pipeline

In [None]:
pipe = EstimatePipeline()

### Specify the location of your data files 

In [None]:
if catalog_tag == 'hsc':
    data_dir = '/global/cfs/cdirs/lsst/groups/PZ/HSCDATA/SPECTROSCOPY'
elif catalog_tag == 'dc2':
    data_dir = '/global/cfs/cdirs/lsst/groups/PZ/PhotoZDC2/run2.2i_dr6_v2/'
elif catalog_tag == 'roman_rubin':
    data_dir = '/sdf/data/rubin/shared/pz/data/roman_rubin_2023_v1.1.3_parquet_healpixel_maglim_25.5_truth_to_observed/10050/'
    truth_dir = '/sdf/data/rubin/shared/pz/data/roman_rubin_2023_v1.1.3_parquet_healpixel_maglim_25.5/10050/'

In [None]:
if catalog_tag == 'hsc':
    photo_input_file = os.path.join(data_dir, 'dered_223501_sz_match_pdr3_dud_NONDET.hdf5')
    spec_input_file = photo_input_file
elif catalog_tag == 'dc2':
    photo_input_file = os.path.join(data_dir, "Run2.2i_dr6_dereddened_tract_4852.hdf5")
    spec_input_file = os.path.join(data_dir, "Run2.2i_dr6_dereddened_tract_4437_test_bright.hdf5")
elif catalog_tag == 'roman_rubin':
    photo_input_file = os.path.join(data_dir, "output.hdf5")                         
    spec_input_file = os.path.join(truth_dir, "part-0.parquet")
root_dir = os.path.join('projects', catalog_tag)

### Grab the default input file name dictionary and update it as desired

It already specifies the input model for each stage.  We add the data input file.

In [None]:
pipe.default_input_dict

In [None]:
input_dict = pipe.default_input_dict.copy()
input_dict.update(
    input=photo_input_file,
)

In [None]:
pipe_info = pipe.initialize(input_dict, dict(output_dir=root_dir, log_dir='.', resume=True), None)

### Additional config update to specify the place to find the spectroscopic columns for the summarizer

### Save the pipeline to a yaml file

In [None]:
pipe.save('estimate_all.yml')

[For NERSC users!]

This won't work on jupyter.nersc.gov, for a test run, you need to 
1. Add "name: local" to the "site" section in the 'estimate_all.yml'
2. ssh into perlmutter.nersc.gov, set up the rail environment, and run "ceci estimate_all.yml"

In [None]:
import ceci
pr = ceci.Pipeline.read('estimate_all.yml')

In [None]:
pr.run()

# display $p(z)$ of the point estimators

### starting with simple NN

In [None]:
infile = './pdf/pz/output_simplenn.hdf5'

qp_ensemble_simplenn = qp.read(infile)


In [None]:
plt.figure()
plt.hist(qp_ensemble_simplenn.ancil['zmode'], bins=40)
plt.xlabel('z')
plt.show()

### Now read the spectroscopic data

In [None]:
import tables_io

In [None]:
sz_data = tables_io.read("dered_223501_sz_match_pdr3_dud_NONDET.hdf5")

In [None]:
specz = sz_data['specz_redshift']

Compare the mode of the photo-z PDF with the spectroscopic redshift

In [None]:
plt.figure(figsize = (6,6))
plt.scatter(specz, qp_ensemble_simplenn.ancil['zmode'],s = 0.01)
plt.xlabel('Spec z')
plt.ylabel('Simple NN Mode')
plt.xlim(0,3)
plt.ylim(0,3)
plt.plot([-1,10], [-1,10], '--', color = 'black', )

### Do the same for the Flex-z-boost

In [None]:
infile = './pdf/pz/output_FZBoost.hdf5'

qp_ensemble_flexzboost = qp.read(infile)

# print(qp_ensemble.npdf)
# print(qp_ensemble.ancil)

In [None]:
plt.figure(figsize = (6,6))
plt.scatter(specz, qp_ensemble_flexzboost.ancil['zmode'],s = 0.01)
plt.xlabel('Spec z')
plt.ylabel('Flex z boost Mode')
plt.xlim(0,3)
plt.ylim(0,3)
plt.plot([-1,10], [-1,10], '--', color = 'black')

## display $n(z)$

In [None]:
# directly reading the hdf5 files with qp

qp_single_nz_som = qp.read('./single_NZ_estimate_simplesom.hdf5')
qp_single_nz_somoclu = qp.read('./single_NZ_estimate_somoclu.hdf5')

In [None]:
z_grid = np.linspace(0,6,600)
nz_som_grid = qp_single_nz_som.pdf(z_grid)
nz_somoclu_grid = qp_single_nz_somoclu.pdf(z_grid)


In [None]:
# Part of the spactroscopic samples failed and have z=-99, we should exclude them. 
specz_good = specz[specz>0.0]

### Make a plot to compare the two summarizers and the true n(z)

In [None]:
plt.figure()
plt.plot(z_grid, nz_som_grid[0], label = 'Simple SOM')
plt.plot(z_grid, nz_somoclu_grid[0], label = 'Somoclu')
plt.hist(specz_good,density = True ,bins = 600,histtype = 'step', label = 'True (Spec-z)')
plt.xlim(-0.1,3)
plt.ylim(0,1.2)
plt.xlabel('z')
plt.ylabel('n(z)')
plt.legend()