# TPz lite

_Authors: Andreia Dourado, Bruno Moraes_

_Adapted from Sam Schmidt example notebook: https://github.com/LSSTDESC/rail_tpz ._


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import rail
import qp
from rail.core.data import TableHandle, PqHandle, ModelHandle, QPHandle, DataHandle, Hdf5Handle
from rail.core.data import TableHandle
from rail.core.stage import RailStage
import os
import tables_io

from rail.estimation.algos.tpz_lite import TPZliteInformer
from rail.estimation.algos.tpz_lite import TPZliteEstimator

In [None]:
DS = RailStage.data_store
DS.__class__.allow_overwrite = True

### 1. Reading the data

Train file

In [None]:
#população
pop = 'sem'
s = '5'

In [None]:
train_file = f'/home/andreia.dourado/TCC/rubin_roman_steps/{s}sigma/{pop}_populacao/train_file.hdf5'

In [None]:
training_data = DS.read_file("training_data", TableHandle, train_file)

In [None]:
print(training_data.data)

In [None]:
len(training_data.data['mag_g'])

Test file

In [None]:
testfile = f'/home/andreia.dourado/TCC/rubin_roman_steps/{s}sigma/{pop}_populacao/test_file.hdf5'

In [None]:
test_data = DS.read_file("test_data", TableHandle, testfile)

In [None]:
print(test_data.data)

In [None]:
len(test_data.data['mag_g'])

### 2. Run TPz

#### 2.1 Setting the parameters

In [None]:
bands = ["g", "i", "r", "u", "y", "z"]
new_err_dict = {}
train_atts = []
new_mag_limits={}
for band in bands:
    train_atts.append(f"mag_{band}")
    new_err_dict[f"mag_{band}"] = f"magerr_{band}"
    new_mag_limits[f"mag_{band}"] = max(training_data.data[f"mag_{band}"])
# redshift is also an attribute used in the training, but it does not have an associated
# error its entry in the err_dict should be set to "None"
new_err_dict["redshift"] = None

print(new_err_dict)
print(train_atts)
print(new_mag_limits)

In [None]:
new_err_dict.items()

In [None]:
random = 20
trees = 5
strategy = 'sklearn'

In [None]:
tpz_dict = dict(zmin=min(training_data.data['redshift']), 
                zmax=max(training_data.data['redshift']), 
                nzbins=301, 
                mag_limits=new_mag_limits,
                bands=train_atts,
                hdf5_groupname=None,
                use_atts=train_atts,
                err_dict=new_err_dict,
                nrandom= random, 
                ntrees= trees,
                tree_strategy= strategy) 

#### 2.2 Inform method

In [None]:
estimator_name = f'/home/andreia.dourado/TCC/rubin_roman_steps/{s}sigma/{pop}_populacao/estimator_{random}random_{trees}trees_{strategy}.pkl'

In [None]:
pz_train = TPZliteInformer.make_stage(name='inform_TPZ', model=estimator_name, **tpz_dict)

In [None]:
%%time
pz_train.inform(training_data)

#### 2.3 Estimate stage

###### 2.3.1 Using a previous .pkl file:

In [None]:
import pickle

In [None]:
model = pickle.load(open(f'/home/andreia.dourado/TCC/rubin_roman_steps/{s}sigma/{pop}_populacao/estimator_{random}random_{trees}trees_{strategy}.pkl','rb'))

##### 2.3.2 Using the .pkl file generated on this notebook:

In [None]:
model = pz_train.get_handle('model')

Run:

In [None]:
test_dict = dict(hdf5_groupname=None,
                test_err_dict=new_err_dict,
                mag_limits=new_mag_limits)
test_dict

In [None]:
output_name = f'/home/andreia.dourado/TCC/rubin_roman_steps/{s}sigma/{pop}_populacao/output_{random}random_{trees}trees_{strategy}.hdf5'
output_name

In [None]:
test_runner = TPZliteEstimator.make_stage(name="test_tpz", output=output_name,
                                          model=model, **test_dict)

In [None]:
%%time
results = test_runner.estimate(test_data)