# Run TPz lite

_Autores: Andreia Dourado, Bruno Moraes_

_Adaptado do notebook de exemplo feito por Sam Schmid: https://github.com/LSSTDESC/rail_tpz ._

__Descrição: Passo a passo para rodar as etapas de treino(inform) e teste(estimate) do TPZ utilizando o RAIL.__


### 1. Importando as bibliotecas:

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import rail
import qp
from rail.core.data import TableHandle, PqHandle, ModelHandle, QPHandle, DataHandle, Hdf5Handle
from rail.core.data import TableHandle
from rail.core.stage import RailStage
import os
import tables_io

from rail.estimation.algos.tpz_lite import TPZliteInformer
from rail.estimation.algos.tpz_lite import TPZliteEstimator

In [None]:
DS = RailStage.data_store
DS.__class__.allow_overwrite = True

### 2. Leitura dos arquivos:

#### 2.1 Arquivo de treino:

In [None]:
train_file = '/lustre/t0/scratch/users/andreia.dourado/TCC/dp02/object/runs/5sigma/train_file_dp02_object_5sigma.hdf5'

In [None]:
training_data = DS.read_file("training_data", TableHandle, train_file)

In [None]:
print(training_data.data)

In [None]:
len(training_data.data['photometry']['mag_g'])

#### 2.2 Arquivo de teste:

In [None]:
testfile =  '/lustre/t0/scratch/users/andreia.dourado/TCC/dp02/object/runs/5sigma/test_file_dp02_object_5sigma.hdf5'

In [None]:
test_data = DS.read_file("test_data", TableHandle, testfile)

In [None]:
print(test_data.data)

In [None]:
len(test_data.data['photometry']['mag_g'])

### 3. Run TPz

#### 3.1 Parâmetros de configuração

__Atributos, erros de magnitudes e magnitudes limites:__

In [None]:
bands = ["u", "g", "r", "i", "z", "y"]
new_err_dict = {}
train_atts = []
new_mag_limits={}
for band in bands:
    train_atts.append(f"mag_{band}")
    new_err_dict[f"mag_{band}"] = f"magerr_{band}"
    new_mag_limits[f"mag_{band}"] = max(training_data.data['photometry'][f"mag_{band}"])
# redshift is also an attribute used in the training, but it does not have an associated
# error its entry in the err_dict should be set to "None"
new_err_dict["redshift"] = None

print(new_err_dict)
print(train_atts)
print(new_mag_limits)

In [None]:
new_err_dict.items()

__Número de árvores e função de árvore de decisão a ser usada:__ \
    random: número de catálogos aleatórios criados a partir do original\
    trees: número de conjuntos bootstrap criados a partir de cada catálogo\
    _número final de árvores: random x trees_

In [None]:
random = 12
trees = 9
strategy = 'sklearn'

__Dicionário com os parâmetros configurados:__

In [None]:
tpz_dict = dict(zmin=min(training_data.data['photometry']['redshift']), 
                zmax=max(training_data.data['photometry']['redshift']), 
                nzbins=301, 
                mag_limits=new_mag_limits,
                bands=train_atts,
                hdf5_groupname='photometry',
                use_atts=train_atts,
                err_dict=new_err_dict,
                nrandom= random, 
                ntrees= trees,
                minleaf = leaf,
                tree_strategy= strategy) 

#### 3.2 Inform method

__Definindo o caminho e o nome do arquivo .pkl que será gerado:__

In [None]:
estimator_name =  '/lustre/t0/scratch/users/andreia.dourado/TCC/dp02/object/runs/5sigma/estimator_dp02_object_5sigma.pkl'
print(estimator_name)

__Rodando o TPZ:__

In [None]:
pz_train = TPZliteInformer.make_stage(name='inform_TPZ', model=estimator_name, **tpz_dict)

In [None]:
%%time
pz_train.inform(training_data)

#### 3.3 Estimate stage

##### __Para utilizar um arquivo .pkl gerado previamente:__

In [None]:
import pickle

In [None]:
model = pickle.load(open(f'/home/andreia.dourado/TCC/dp01/10sigma/estimator_tpz_10sigma.pkl','rb'))

##### __Para utilizar o arquivo .pkl gerado neste notebook:__

In [None]:
model = pz_train.get_handle('model')

__Dicionário com os parâmetros de configuração:__

In [None]:
test_dict = dict(hdf5_groupname='photometry',
                test_err_dict=new_err_dict,
                mag_limits=new_mag_limits)
test_dict

__Caminho para salvar o arquivo com o output do estimate:__

In [None]:
output_name = f'/lustre/t0/scratch/users/andreia.dourado/TCC/dp02/truth/5sigma/runs/output_tpz_dp02_truth_lsst_error_model_5sigma_{leaf}leaf.hdf5'
print(output_name)

__Rodando o estimate:__

In [None]:
test_runner = TPZliteEstimator.make_stage(name="test_tpz", output=output_name,
                                          model=model, **test_dict)

In [None]:
%%time
results = test_runner.estimate(test_data)