# BPZ RAIL 

## imports

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline 

In [None]:
import rail
import qp
import tables_io

from rail.core.data import TableHandle
from rail.core.stage import RailStage
from rail.core.utilStages import ColumnMapper, TableConverter

from rail.creation.degradation import LSSTErrorModel, InvRedshiftIncompleteness, LineConfusion, QuantityCut
from rail.creation.engines.flowEngine import FlowEngine, FlowPosterior

from rail.estimation.algos.bpz_lite import Inform_BPZ_lite, BPZ_lite

from rail.evaluation.evaluator import Evaluator

In [None]:
DS = RailStage.data_store
DS.__class__.allow_overwrite = True

curr_dir=os.getcwd()
curr_dir

In [None]:
RAIL_DIR = os.path.join(os.path.dirname(rail.__file__), '..')
flow_file_path = os.path.join(RAIL_DIR, 'examples/goldenspike/data/pretrained_flow.pkl')

print(RAIL_DIR)
pd.read_pickle(flow_file_path)['data_columns']

In [None]:
bands = ['u','g','r','i','z','y']
band_dict = {band:f'mag_{band}_lsst' for band in bands}
rename_dict = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

## Train -> criação de dados fakes (prior)

In [None]:
flow_engine_train = FlowEngine.make_stage(name='flow_engine_train', flow=flow_file_path, n_samples=50, seed=1235)
#help(FlowEngine)
lsst_error_model_train = LSSTErrorModel.make_stage(name='lsst_error_model_train', bandNames=band_dict, seed=29)

inv_redshift = InvRedshiftIncompleteness.make_stage(name='inv_redshift', pivot_redshift=1.0)

line_confusion = LineConfusion.make_stage(name='line_confusion', true_wavelen=5007., wrong_wavelen=3727., frac_wrong=0.05)

quantity_cut = QuantityCut.make_stage(name='quantity_cut', cuts={'mag_i_lsst': 25.0})

col_remapper_train = ColumnMapper.make_stage(name='col_remapper_train', columns=rename_dict)
   
table_conv_train = TableConverter.make_stage(name='table_conv_train', output_format='numpyDict')

In [None]:
train_data_orig = flow_engine_train.sample(3500, 1235)
#help(flow_engine_train.sample)
train_data_errs = lsst_error_model_train(train_data_orig, seed=66)
train_data_inc = inv_redshift(train_data_errs)
train_data_conf = line_confusion(train_data_inc)
train_data_cut = quantity_cut(train_data_conf)
train_data_pq = col_remapper_train(train_data_cut)
train_data = table_conv_train(train_data_pq)

In [None]:
train_table = tables_io.convertObj(train_data.data, tables_io.types.PD_DATAFRAME)
train_table.head()

## Estimator -> definição de PRIORS

In [None]:
inform_bpz = Inform_BPZ_lite.make_stage(name='inform_bpzlite', input='inprogress_output_table_conv_train.hdf5',
                                        model='trained_BPZ_output.pkl', hdf5_groupname='', nt_array=[8],
                                        mmax=26., type_file='')
inform_bpz.config.to_dict()

In [None]:
%%time
inform_bpz.inform(train_data)

## Test -> criação de dados fakes

In [None]:
#help(FlowEngine)
flow_engine_test = FlowEngine.make_stage(name='flow_engine_test', flow=flow_file_path, n_samples=50)

lsst_error_model_test = LSSTErrorModel.make_stage(name='lsst_error_model_test', bandNames=band_dict)
                
col_remapper_test = ColumnMapper.make_stage(name='col_remapper_test', columns=rename_dict)

table_conv_test = TableConverter.make_stage(name='table_conv_test', output_format='numpyDict')

In [None]:
test_data_orig = flow_engine_test.sample(3500, 1234)
test_data_errs = lsst_error_model_test(test_data_orig, seed=58)
test_data_pq = col_remapper_test(test_data_errs)
#test_data_post = flow_post_test.get_posterior(test_data_pq, 'redshift', err_samples=None)
test_data = table_conv_test(test_data_pq)

In [None]:
test_table = tables_io.convertObj(test_data.data, tables_io.types.PD_DATAFRAME)
test_table.head()

## Posterior -> Handler -> roda o algoritmo


In [None]:
colum_file = os.path.join(curr_dir, 'configs/test_bpz.columns')
estimate_bpz = BPZ_lite.make_stage(name='estimate_bpz', hdf5_groupname='',
                                   columns_file=colum_file,
                                   model = inform_bpz.get_handle('model'))

In [None]:
bpz_estimated = estimate_bpz.estimate(test_data)

In [None]:
#help(bpz_estimated())
bpz_estimated().build_tables()

#results_tables = tables_io.convertObj(bpz_estimated().build_tables()['ancil'], tables_io.types.PD_DATAFRAME)
#results_tables

In [None]:
the_eval = Evaluator.make_stage(name=f'bpz_eval', truth=test_data_orig)
print(bpz_estimated, test_data_orig)
result_dict = the_eval.evaluate(bpz_estimated, test_data_orig)

In [None]:
results_tables = tables_io.convertObj(result_dict.data, tables_io.types.PD_DATAFRAME)
results_tables.head()

## Resultado pz x spec-z

In [None]:
zmode = bpz_estimated().ancil['zmode']

In [None]:
plt.figure(figsize=(8,8))
plt.scatter(test_data()['redshift'],zmode,s=1,c='k',label='simple bpz mode')
plt.plot([0,3],[0,3],'r--');
plt.xlabel("true redshift")
plt.ylabel("bpz photo-z")

## PIT COM PROBLEMAS

In [None]:
from rail.evaluation.metrics.pit import *
from rail.core.data import QPHandle, TableHandle

pdfs_file =  os.path.join(curr_dir, "output_estimate_bpz.hdf5")

ztrue = test_data_orig()['redshift']
fzdata = DS.read_file('pdfs_data', QPHandle, pdfs_file)

pitobj = PIT(fzdata(), ztrue)
quant_ens, metamets = pitobj.evaluate()
print(quant_ens, metamets)

In [None]:
pit_vals = np.array(pitobj._pit_samps)
print(pit_vals)

pit_out_rate = PITOutRate(pit_vals, quant_ens).evaluate()
print(f"PIT outlier rate of this sample: {pit_out_rate}") 

In [None]:
galid = 1500

zgrid = np.linspace(0, 3., 301)
single_gal = np.squeeze(bpz_estimated()[galid].pdf(zgrid))
print(bpz_estimated()[galid].pdf(zgrid))
single_zmode = zmode[galid]
truez = test_data()['redshift'][galid]

plt.plot(zgrid,single_gal,color='k',label='single pdf')
plt.axvline(single_zmode,color='k', ls='--', label='mode')
plt.axvline(truez,color='r',label='true redshift')
plt.legend(loc='upper right')
plt.xlabel("redshift")
plt.ylabel("p(z)")