PLASTICC test data from release
https://zenodo.org/record/2539456

class_ids_names = { 90: 'Ia',
                  42: 'II', 
                  62: 'Ibc', 
                  67: 'Ia-91bg', 
                  15: 'TDE'} 

object_id : Unique object identifier (integer32)

ra : right ascension, degrees (float32)

decl : declination, degrees (float32)

ddf_bool : boolean flag: 1 for DDF, 0 for WFD

hostgal_specz : accurate spec-redshift for small subset (float32)

hostgal_photoz : photometric host-redshift (float32)

hostgal_photoz_err : uncertainty on photometric host-redshift (float32)


distmod : distance modulus computed with hostgal_photoz (float32)

mwebv : Galactic E(B-V) extinction (float32)

target : integer model class during challenge (0 for test set)

true_target : integer model class for all objects (post-challenge)

true_submodel : sub-model type for independently-developed models

true_z : true redshift, cmb frame (float32)

In [10]:
import pandas as pd
from estimate.constants import *
import pickle

p_df = pd.read_csv(DATA_DIR + "plasticc/plasticc_test_metadata.csv")

keeps_ids = list(CLASS_ID_NAMES.values())
keep_p_df = p_df[p_df['true_target'].isin(keeps_ids)]

with open(DATA_DIR+'LSST_data.pickle', 'wb') as handle:
        pickle.dump(keep_p_df, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [4]:
unique_ids = p_df['object_id'].unique()
p_df.shape[0] == len(unique_ids)
print("Each row has unique object ID.")
print("Total " + str(p_df.shape[0]))

Each row has unique object ID.
Total 3492890


In [1]:
from estimate.get_data import *
# lsst_df = get_lsst_data()
# get_lsst_class_Zs(class_name="Ia",  lsst_df=lsst_df)

In [2]:
from models.multi_model.multi_model import MultiModel

cols = ["g_mag", "r_mag", "i_mag", "z_mag", "y_mag",
            "W1_mag", "W2_mag", "H_mag", "K_mag", 'J_mag',
            Z_FEAT]
codes = ["A1", "F1", "B1", "G1"]
model = MultiModel(cols=cols,
                       class_labels=['Unspecified Ia', 'II'],
                       transform_features=False,
                       case_code=codes,
                       min_class_size=40,
                       data_file=CUR_DATA_PATH,
                       )

Saving Multiclass Classifier output to directory /Users/marina/Documents/PhD/research/astro_research/code/environments/dist_env/lib/python3.8/site-packages/thex_data/../output/Multiclass_Classifier13


Constructing Class Hierarchy Tree...
Using data: /Users/marina/Documents/PhD/research/astro_research/code/dist_code/estimate/../../../data/catalogs/v8/THEx-v8.0-release.mags-xcalib.min-xcal.fits

Classes:
['Unspecified Ia', 'II']

Features:
['g_mag', 'r_mag', 'i_mag', 'z_mag', 'y_mag', 'W1_mag', 'W2_mag', 'H_mag', 'K_mag', 'J_mag', 'event_z']


		Class Counts
Unspecified Ia : 7104
II : 2312


In [10]:
thex_dataset = pd.concat([model.X, model.y], axis=1)

In [16]:
tr = get_thex_class_data("Unspecified Ia", thex_dataset)

In [17]:
tr

Unnamed: 0,g_mag,r_mag,i_mag,z_mag,y_mag,W1_mag,W2_mag,H_mag,K_mag,J_mag,event_z,transient_type
0,21.898060,20.725023,20.228594,19.929192,19.846043,19.609688,19.954700,21.154827,20.916563,20.617863,0.29300,"I, Ia, _ROOT, _SN, _W_UVOPT, Unspecified Ia"
1,16.848820,16.359283,15.967090,15.833683,15.759572,16.219620,16.743914,15.804460,15.858040,16.032152,0.04100,"I, Ia, _ROOT, _SN, _W_UVOPT, Unspecified Ia"
2,18.268185,17.890799,17.710703,17.687584,17.622358,18.328897,18.897024,19.133375,19.538870,18.411169,0.06870,"I, Ia, _ROOT, _SN, _W_UVOPT, Unspecified Ia"
3,18.071247,17.540562,17.211061,17.149532,17.015093,17.039862,17.501480,15.942355,15.667990,16.142517,0.07109,"I, Ia, _ROOT, _SN, _W_UVOPT, Unspecified Ia"
4,17.157240,16.644789,16.343119,16.295837,16.156345,16.376265,16.865744,15.488778,15.544565,15.770565,0.03100,"I, Ia, _ROOT, _SN, _W_UVOPT, Unspecified Ia"
...,...,...,...,...,...,...,...,...,...,...,...,...
9407,17.829165,16.990057,16.470192,16.233416,15.988482,16.118584,16.658617,15.361423,15.725965,15.722445,0.08930,"I, Ia, _ROOT, _SN, _W_UVOPT, Unspecified Ia"
9408,15.396293,14.789064,14.195647,14.026376,13.818435,14.167336,14.654648,13.309790,13.398217,13.600626,0.05600,"I, Ia, _ROOT, _SN, _W_UVOPT, Unspecified Ia"
9409,22.766733,21.022842,20.602638,20.415592,20.275206,19.316761,19.483290,21.757317,21.985352,20.968248,0.44700,"I, Ia, _ROOT, _SN, _UNCERTAIN, _W_UVOPT, Unspe..."
9410,13.933667,13.489554,13.198602,13.109960,13.068508,13.206953,13.781283,12.470519,12.807032,12.685899,0.01320,"I, Ia, _ROOT, _SN, _W_UVOPT, Unspecified Ia"


In [18]:
lsst_df = get_lsst_data()
lsst_z_vals = get_lsst_class_Zs(class_name="Ia",
                                    lsst_df=lsst_df)

In [19]:
lsst_z_vals

array([0.624, 0.454, 0.792, ..., 0.455, 0.503, 0.712])

In [20]:
Z_bins = np.linspace(0, 1, 50)
hist, bins = np.histogram(lsst_z_vals, bins=Z_bins)
z_dist = hist / len(lsst_z_vals) 

In [21]:
z_dist[4]

0.0033111804756026366

In [23]:
num_samples=140

In [31]:
index = 5
freq = z_dist[index]
samples = round(num_samples * freq)
print("Sampling num samples: " + str(samples))
min_feature = Z_bins[index]
max_feature = Z_bins[index + 1]
# Filter by redshift
f_df = thex_class_data[(thex_class_data[Z_FEAT] >= min_feature) & (
    thex_class_data[Z_FEAT] <= max_feature)]

Sampling num samples: 1.0


In [32]:
f_df

Unnamed: 0,g_mag,r_mag,i_mag,z_mag,y_mag,W1_mag,W2_mag,H_mag,K_mag,J_mag,event_z,transient_type
1024,19.630512,19.035997,18.524569,18.38398,18.261177,17.740244,18.122986,17.31719,17.336788,17.83218,0.104,"CC, II, IIn, _ROOT, _SN, _SN_INTERACT, _W_UVOP..."
1064,19.027924,18.685076,18.521338,18.644167,18.633337,18.891264,19.159967,18.394716,18.213194,18.685268,0.12,"CC, II, _ROOT, _SN, _W_UVOPT, Unspecified II"
1839,17.484735,17.093166,16.745504,16.694603,16.374605,16.497562,16.926065,15.375065,14.827612,15.752765,0.103,"CC, II, _ROOT, _SN, _W_UVOPT, Unspecified II"
1969,17.76158,17.225565,16.890726,16.738485,16.699993,16.832504,17.251249,15.958875,16.059525,16.35441,0.112,"CC, II, _ROOT, _SN, _W_UVOPT, Unspecified II"
3150,19.083794,18.428846,18.139774,17.968384,18.54471,18.114717,18.585117,19.410055,19.530781,18.626232,0.119688,"CC, II, _ROOT, _SN, _W_UVOPT, Unspecified II"
3182,18.188961,17.783623,17.441666,17.417118,17.400822,17.390676,17.861195,18.775904,19.086218,18.108646,0.118789,"CC, II, _ROOT, _SN, _UNCERTAIN, _W_UVOPT, Unsp..."
3221,18.991514,18.594561,18.293577,18.352146,18.426264,18.429153,19.140732,18.202623,17.645933,18.165564,0.114669,"CC, II, _ROOT, _SN, _W_UVOPT, Unspecified II"
3298,19.740904,18.967903,18.480038,18.408384,18.706619,18.390936,18.887861,19.655127,20.172285,17.949169,0.106605,"CC, II, _ROOT, _SN, _W_UVOPT, Unspecified II"
3352,17.785816,17.311609,16.983335,16.818943,16.616436,17.023848,17.493538,18.248089,16.635897,17.634954,0.114897,"CC, II, _ROOT, _SN, _W_UVOPT, Unspecified II"
3356,17.711237,17.038475,16.713642,16.495928,16.299166,16.045258,16.446129,17.479282,17.87114,17.023802,0.103514,"CC, II, _ROOT, _SN, _W_UVOPT, Unspecified II"


In [35]:
f_df.sample(n=int(1.0))

Unnamed: 0,g_mag,r_mag,i_mag,z_mag,y_mag,W1_mag,W2_mag,H_mag,K_mag,J_mag,event_z,transient_type
6221,18.74728,18.385277,18.127626,18.082731,18.234802,18.316849,18.764482,18.091455,17.843937,18.046888,0.104,"CC, II, II P, _ROOT, _SN, _W_UVOPT, Unspecifie..."


In [33]:
samples = round(112 * freq)
samples

1.0

In [30]:
freq

0.0033111804756026366