# 

<img align="left" src = https://linea.org.br/wp-content/themes/LIneA/imagens/logo-header.jpg width=100 style="padding: 20px"> 

<img align="left" src = https://project.lsst.org/sites/default/files/Rubin-O-Logo_0.png width=160 style="padding: 20px">  

# First Try of executing RAIL using DP0.2 - FAILED

**Contact author**: Heloisa da Silva Mengisztki ([heloisasmengisztki@gmail.com](mailto:heloisasmengisztki@gmail.com)) 

**Last verified run**: 2022-12-01 (YYYY-MM-DD) <br><br><br>

This notebook is a incomplete first try to execute rail_bpz using the dp0.2 data

### IMPORTS

In [None]:
import time
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from rail.core.utils import RAILDIR
from lsst.rsp import get_tap_service

%matplotlib inline 

In [None]:
import rail
import qp
import tables_io

from rail.core.data import TableHandle
from rail.core.stage import RailStage
from rail.core.utilStages import ColumnMapper, TableConverter

##from rail.creation.engines.flowEngine import FlowEngine, FlowPosterior

from rail.estimation.algos.bpz_lite import Inform_BPZ_lite, BPZ_lite

from rail.evaluation.evaluator import Evaluator

#for rail versions
help(rail)

In [None]:
service = get_tap_service()

assert service is not None
assert service.baseurl == "https://data.lsst.cloud/api/tap"

## General Configs

In [None]:
pd.set_option('display.max_rows', 20)

In [None]:
CURR_DIR = os.getcwd()
CURR_DIR, RAILDIR

## Reading DP0.2 data

In [None]:
max_rec = 1000
use_center_coords = "62, -37"
use_radius = "1.0"

In [None]:
bands = ['g', 'i', 'r', 'u', 'y', 'z']

mags = ""
for band in bands:
    mags+= f"scisql_nanojanskyToAbMag({band}_cModelFlux) AS mag_{band}_cModel, {band}_cModelFluxErr, "

columns_query = f"objectId, {mags}coord_ra, coord_dec "

In [None]:
query = "SELECT " + columns_query + \
        "FROM dp02_dc2_catalogs.Object " + \
        "WHERE CONTAINS(POINT('ICRS', coord_ra, coord_dec), CIRCLE('ICRS', " + use_center_coords + ", " + use_radius + ")) = 1 " + \
        "AND detect_isPrimary = 1 " + \
        "AND r_extendedness = 1 " + \
        "AND scisql_nanojanskyToAbMag(r_cModelFlux) > 17.0 " + \
        "AND scisql_nanojanskyToAbMag(r_cModelFlux) < 23.0 "
print(query)

In [None]:
%%time
results = service.search(query, maxrec=max_rec)
print(type(results))
results = results.to_table()
print(type(results))
results_pd = results.to_pandas()
results_pd.info(memory_usage="deep")

In [None]:
results_pd.head()

---

##  RAIL BPZ

In [None]:
DS = RailStage.data_store
DS.__class__.allow_overwrite = True

In [None]:
columns_remmap = {
"objectId": "id",
"coord_ra": "coord_ra",
"coord_dec": "coord_dec",
"mag_g_cModel": "mag_g_lsst",
"g_cModelFluxErr": "mag_err_g_lsst",
"mag_i_cModel": "mag_r_lsst",
"i_cModelFluxErr": "mag_err_r_lsst",
"mag_r_cModel": "mag_i_lsst",
"r_cModelFluxErr": "mag_err_i_lsst",
"mag_u_cModel": "mag_u_lsst",
"u_cModelFluxErr": "mag_err_u_lsst",
"mag_y_cModel": "mag_y_lsst",
"y_cModelFluxErr": "mag_err_y_lsst",
"mag_z_cModel": "mag_z_lsst",
"z_cModelFluxErr": "mag_err_z_lsst",
"detect_isPrimary": "detect_isPrimary"
}

col_remapper_train = ColumnMapper.make_stage(name='col_remapper_train', columns=columns_remmap)
table_conv_train = TableConverter.make_stage(name='table_conv_train', output_format='numpyDict')

results_remmaped = col_remapper_train(results_pd)
## the redshift value is required and it is going to come from other surveys 
results_remmaped.data["redshift"] = 1

train_data = table_conv_train(results_remmaped)

As we can see, ceci stages basically configures the name and some configuration, so that when the stage runs, it return a TableHander, such as a PqHandler, Hdf5Handle or FitsHandle. 

obs: For machine leaning algorithms if may be necessary to configure a flowHandler too.

In [None]:
type(results_remmaped), type(train_data)

In [None]:
DS

In [None]:
test_table = tables_io.convertObj(train_data.data, tables_io.types.PD_DATAFRAME)
test_table.head()

Here we should have somewhere a redshift result from other surveys.

### PRIORS - Inform

In [None]:
columns_file = os.path.join(CURR_DIR, '../configs/bpz.columns')
inform_bpz = Inform_BPZ_lite.make_stage(
    name='inform_bpzlite', 
    input="inprogress_output_table_conv_train.hdf5", 
    model='trained_BPZ_output.pkl', ##cria o arquivo de treinamento pra usar depois
    hdf5_groupname='', 
    columns_file=columns_file
)

In [None]:
%%time
returned = inform_bpz.inform(train_data)

___

## Posterior -> Estimate


In [None]:
estimate_bpz = BPZ_lite.make_stage(
    name='estimate_bpz', 
    hdf5_groupname='', 
    columns_file=columns_file, 
    model=inform_bpz.get_handle('model'))

In [None]:
bpz_estimated = estimate_bpz.estimate(train_data)

In [None]:
#help(bpz_estimated())
bpz_estimated().build_tables()

results_tables = tables_io.convertObj(bpz_estimated().build_tables()['ancil'], tables_io.types.PD_DATAFRAME)
results_tables

In [None]:
test_data_orig = results_remmaped.data

evaluator = Evaluator.make_stage(name=f'bpz_eval', truth=test_data_orig)
result_dict = evaluator.evaluate(bpz_estimated, test_data_orig)

In [None]:
results_tables = tables_io.convertObj(result_dict.data, tables_io.types.PD_DATAFRAME)
results_tables.head()

___
## Resultado pz x spec-z

In [None]:
zmode = bpz_estimated().ancil['zmode']

In [None]:
plt.figure(figsize=(8,8))
plt.scatter(train_data()['redshift'],zmode,s=1,c='k',label='simple bpz mode')
plt.plot([0,3],[0,3],'r--');
plt.xlabel("true redshift")
plt.ylabel("bpz photo-z")

## Conslusion 

The results obtained made sense since we need a trained file or to train the algorithm and since we are using dp0.2 we do not have a training set. For that we would have to have a redshift column. But it is still possible to run the algorithm with an untrained set.