In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import os

from torch import nn, optim
from torch.optim import lr_scheduler
import time


In [5]:
def sigma68(data): return 0.5*(pd.Series(data).quantile(q = 0.84) - pd.Series(data).quantile(q = 0.16))

In [6]:
def exposure_SN(cat_in, filters,  scale = True):

    sn_lims =  {'U': 25.25, 'G': 24.65, 'R': 24.15, 'I': 24.35, 'ZN': 23.95,  'H':25,'J':25, 'Y':25}
    #sn_lims =  {'U': 25.5, 'G': 24.4, 'R': 24.1, 'I': 24.1, 'ZN': 23.7,  'H':23.2,'J':23.2, 'Y':23.2}

    #sn_lims =  {'U':35, 'G':35, 'R': 40, 'I':40, 'ZN':40, 'H':35,'J':35, 'Y':35}
    #sn_lims =  {'U':27, 'G':27, 'R': 27, 'I':27, 'ZN':27, 'H':27,'J':27, 'Y':27}
    #sn_lims =  {'U':28, 'G':26, 'R': 25, 'I':25, 'ZN':26, 'H':25,'J':25, 'Y':25}

    
    scale_filters = [x for x in filters if scale == True]
    lims = [sn_lims[x] for x in filters]

    sn_val0 = 5
    R = 1

    sn_val = [(R*sn_val0 if x in scale_filters else sn_val0) for x in filters]

    D = sn_val*10**(np.array(lims) / 5.)

    mag = np.array(cat_in[[x for x in filters]])
    SN_arr =  D*10**(-0.2*mag)


    sn_min = 0
    sn_max = lims


    SN_arr = np.clip(SN_arr, sn_min, sn_max)
    SN = pd.DataFrame(SN_arr, columns=filters, index= cat_in.index)

    return SN



In [147]:
catalog = pd.read_csv('/data/astro/scratch/lcabayol/Euclid/PAUS_mock_Euclidbands.csv', sep = ',', header = 0, comment = '#')
#catalog = pd.read_csv('/cephfs/pic.es/astro/scratch/lcabayol/Euclid/Euclid_mock.csv', sep = ',', header = 0, comment = '#')

catalog = catalog.dropna()
catalog = catalog.loc[:50000,:].reset_index()

In [148]:
Ngals = 35000
catalog = catalog.sample(Ngals)
catalog['imag'] = 26 - 2.5*np.log10(catalog.flux_nl_el_t_blanco_decam_i.values)
catalog = catalog[catalog.imag < 23]

In [149]:
catalog['ref_id'] = np.arange(len(catalog))

nb_names_old = ['flux_nl_el_t_pau_nb%s'%x for x in 455+10*np.arange(40)]
nb_names_new = ['NB%s'%x for x in 455+10*np.arange(40)]
nb_name_dict = dict(zip(nb_names_old, nb_names_new))

catalog_nb_nl = catalog.set_index('ref_id')[nb_names_old].rename(columns = nb_name_dict)

bb_names_old = ['flux_nl_el_t_cfht_u','flux_nl_el_t_blanco_decam_g','flux_nl_el_t_blanco_decam_r','flux_nl_el_t_blanco_decam_i','flux_nl_el_t_blanco_decam_z','flux_nl_el_t_euclid_nisp_h','flux_nl_el_t_euclid_nisp_j','flux_nl_el_t_euclid_nisp_y']
bb_names_new = ['U','G','R','I','ZN','H','J','Y']
bb_name_dict = dict(zip(bb_names_old, bb_names_new))

catalog_bb_nl = catalog.set_index('ref_id')[bb_names_old].rename(columns = bb_name_dict)

In [150]:
# load fits from Martin Eriksen to estimate SNR from the flux

snr_fit = pd.read_csv('/nfs/pic.es/user/l/lcabayol/Euclid/snr_fit.csv', sep = ',', header = 0)
factors = snr_fit[snr_fit.key == 'med'].reset_index()

aas = factors.a.values
bs = factors.b.values

f = 0.7
SNR_NB = np.exp(aas*np.log(f*np.abs(catalog_nb_nl.values)) + bs)
err = (np.abs(catalog_nb_nl.values) / SNR_NB)  * np.random.normal(0,1, size = (catalog_nb_nl.shape))

catalog_nb = pd.DataFrame(catalog_nb_nl + err, columns = nb_names_new, index = catalog_nb_nl.index)
catalog_nb_err = pd.DataFrame(np.abs(err), columns = nb_names_new, index = catalog_nb_nl.index)

In [151]:
catalog_bb_nl_mag = 26 - 2.5*np.log10(catalog_bb_nl)
filters = bb_names_new.copy()

SNR_flagship_BB = exposure_SN(catalog_bb_nl_mag, filters,  scale = True)

err = np.abs(catalog_bb_nl / SNR_flagship_BB)
err_rand = err * np.random.normal(0,1, size = (err.shape))

catalog_bb = catalog_bb_nl.values + err_rand
catalog_bb_err = np.abs(err_rand)

catalog_bb = pd.DataFrame(catalog_bb, columns = bb_names_new, index = catalog_bb_nl.index)
catalog_bb_err = pd.DataFrame(np.abs(err), columns = bb_names_new, index = catalog_bb_nl.index)


In [152]:
catalog_bb = 26 - 2.5*np.log10(catalog_bb)

In [153]:
catalog_bb['target_zs'] = catalog.observed_redshift_gal.values

In [154]:
m = np.arange(18,24,1)
s68 = [0.0025,0.003,0.004,0.0045,0.0055,0.007]
fSNR  = np.polyfit(m,s68,2)
imag = catalog_bb.I.values

dispersion_z = fSNR[0] * imag**2 + fSNR[1]*imag  + fSNR[2]
photoz = catalog_bb.target_zs.values+  dispersion_z * np.random.normal(0,1,size = catalog_bb.shape[0])
catalog_bb['target_zb'] = photoz

In [155]:
#catalog_bb['target_train'] = catalog_bb.target_zs
mask_train = np.random.choice([0,1], p = [0.7,0.3], size = len(catalog_bb))
target_train = catalog_bb.target_zs * mask_train
target_train = np.where(target_train== 0,catalog_bb.target_zb,catalog_bb.target_zs)
catalog_bb['true_z'] = catalog_bb.target_zs
catalog_bb['target_zs'] = catalog_bb.target_zs * mask_train
catalog_bb['target_zb'] = target_train


In [156]:
catalog_bb = catalog_bb.dropna()

# TRAIN NETWORKS

In [157]:
BB_list = ['U','G','R','I','ZN','H','J','Y']
NB_list = catalog_nb.columns.tolist()

In [158]:
import MTLphotozs

## BB

In [159]:
catalog_bb_sub = catalog_bb[catalog_bb.target_zs != 0]#.reset_index()
catalog_nb_sub = catalog_nb[catalog_nb.index.isin(catalog_bb_sub.index)]
catalog_nb_sub = 26-2.5*np.log10(catalog_nb_sub)

In [160]:
BB = MTLphotozs.mtl_photoz(zs = True, flagship=True)
catalog_training= catalog_bb_sub.copy()
catalog_nb_train = catalog_nb_sub[catalog_nb_sub.index.isin(catalog_training.index)]

training_loader = BB.create_loader(catalog_training,catalog_nb_train)
BBnet = BB.train_mtl(training_loader, epochs = 65)

## BB + NB

In [161]:
catalog_bb_sub = catalog_bb[catalog_bb.target_zb != 0].reset_index()
catalog_nb_sub = catalog_nb[catalog_nb.index.isin(catalog_bb_sub.index)]
catalog_nb_sub = 26-2.5*np.log10(catalog_nb_sub)

In [162]:
BBNB = MTLphotozs.mtl_photoz(zs = False, zs_NB = True, flagship = True)
catalog_training= catalog_bb_sub.copy()
catalog_nb_train = catalog_nb_sub[catalog_nb_sub.index.isin(catalog_training.index)]

training_loader = BBNB.create_loader(catalog_training,catalog_nb_train)
BBNBnet = BBNB.train_mtl(training_loader, epochs = 65)

## BB + z

In [163]:
catalog_bb_sub = catalog_bb[catalog_bb.target_zb != 0].reset_index()
catalog_nb_sub = catalog_nb[catalog_nb.index.isin(catalog_bb_sub.index)]
catalog_nb_sub = 26-2.5*np.log10(catalog_nb_sub)

In [164]:
BBz = MTLphotozs.mtl_photoz(zs = False, zs_zb = True, flagship = True)
catalog_training= catalog_bb_sub.copy()
catalog_nb_train = catalog_nb_sub[catalog_nb_sub.index.isin(catalog_training.index)]

training_loader = BBz.create_loader(catalog_training,catalog_nb_train)
BBznet = BBz.train_mtl(training_loader, epochs = 65)

## BB + NB + z

In [165]:
catalog_bb_sub = catalog_bb[catalog_bb.target_zb != 0].reset_index()
catalog_nb_sub = catalog_nb[catalog_nb.index.isin(catalog_bb_sub.index)]
catalog_nb_sub = 26-2.5*np.log10(catalog_nb_sub)

In [166]:
BBNBz = MTLphotozs.mtl_photoz(zs = False, zs_NB_zb = True, flagship = True)
catalog_training= catalog_bb_sub.copy()
catalog_nb_train = catalog_nb_sub[catalog_nb_sub.index.isin(catalog_training.index)]

training_loader = BBNBz.create_loader(catalog_training,catalog_nb_train)
BBNBznet = BBNBz.train_mtl(training_loader, epochs = 65)

## TEST TO i<25

In [167]:
catalog_test = pd.read_csv('/data/astro/scratch/lcabayol/Euclid/Euclid_mock_v2.csv', sep = ',', header = 0, comment = '#')
catalog_test = catalog_test.dropna()
catalog_test = catalog_test[catalog_test.observed_redshift_gal < 1.5]

catalog_test['mag'] =  -2.5 * np.log10(catalog_test.blanco_decam_i) - 48.6
catalog_test = catalog_test[catalog_test.mag < 25]
catalog_test = catalog_test[catalog_test.mag > 18]
catalog_test = catalog_test.sample(50000)
catalog_test = catalog_test.reset_index()

In [168]:
BB_name = ['cfis_u','blanco_decam_g','blanco_decam_r','blanco_decam_i','blanco_decam_z','euclid_nisp_h','euclid_nisp_j','euclid_nisp_y']
catalog_bb_test = catalog_test[BB_name]
bb_names_new = ['U','G','R','I','ZN','H','J','Y']
bb_name_dict = dict(zip(BB_name, bb_names_new))

catalog_bb_test_nl = catalog_bb_test.rename(columns = bb_name_dict)
catalog_bb_test_nl_mag = -48.6 - 2.5*np.log10(catalog_bb_test_nl)
filters = bb_names_new.copy()

SNR_flagship_BB = exposure_SN(catalog_bb_test_nl_mag, filters,  scale = True)
err = np.abs(catalog_bb_test_nl / SNR_flagship_BB)
err_rand = err * np.random.normal(0,1, size = (err.shape))

catalog_bb_test = catalog_bb_test_nl.values + err_rand.values

catalog_bb_test = pd.DataFrame(catalog_bb_test, columns = bb_names_new)

samps_BB_spec_test_store = catalog_bb_test.copy()

samps_BB_spec_test = catalog_bb_test[bb_names_new].values
samps_BB_spec_test = -2.5 * np.log10(samps_BB_spec_test) - 48.6

zspec_test = catalog_test.observed_redshift_gal.values
zb_bin_spec_test = 1000* zspec_test

colors_spec_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:] 

colors_spec_test, zspec_test, zb_bin_spec_test = torch.Tensor(colors_spec_test), torch.Tensor(zspec_test), torch.LongTensor(zb_bin_spec_test)
mag_test = -2.5 * np.log10(catalog_bb_test.I) - 48.6


  samps_BB_spec_test = -2.5 * np.log10(samps_BB_spec_test) - 48.6


catalog_test['ref_id'] = np.arange(len(catalog_test))

bb_names_old = ['flux_nl_el_t_cfht_u','flux_nl_el_t_blanco_decam_g','flux_nl_el_t_blanco_decam_r','flux_nl_el_t_blanco_decam_i','flux_nl_el_t_blanco_decam_z','flux_nl_el_t_euclid_nisp_h','flux_nl_el_t_euclid_nisp_j','flux_nl_el_t_euclid_nisp_y']
bb_names_new = ['U','G','R','I','ZN','H','J','Y']
bb_name_dict = dict(zip(bb_names_old, bb_names_new))

catalog_bb_nl = catalog_test.set_index('ref_id')[bb_names_old].rename(columns = bb_name_dict)

catalog_bb_nl_mag = 26 - 2.5*np.log10(catalog_bb_nl)
filters = bb_names_new.copy()

SNR_flagship_BB = exposure_SN(catalog_bb_nl_mag, filters,  scale = True)

err = np.abs(catalog_bb_nl / SNR_flagship_BB)
err_rand = err * np.random.normal(0,1, size = (err.shape))

fluxes_test_bb = catalog_bb_nl.values + err_rand
mag_test_bb = 26 - 2.5*np.log10(fluxes_test_bb)


colors_test_bb = mag_test_bb.values[:,:-1] - mag_test_bb.values[:,1:]
colors_test_bb = torch.Tensor(colors_test_bb)

zspec_test = catalog_test.observed_redshift_gal
imag = mag_test_bb.I.values

In [169]:
BBnet = BBnet.eval()
_, logalphas, z,logzerr = BBnet(colors_spec_test.cuda())
alphas = torch.exp(logalphas)
zb = (alphas * z).sum(1)
zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

df_bb = pd.DataFrame(np.c_[zb,zspec_test,mag_test], columns = ['zb_bb','zb_true','imag'])
df_bb['rerr_bb'] = (df_bb.zb_bb - df_bb.zb_true) / (1 + df_bb.zb_true)
print('Bias',np.nanmedian(df_bb.rerr_bb), 'scatter', sigma68(df_bb.rerr_bb))


Bias -0.025712944135413973 scatter 0.08207858781124264


In [170]:
BBNBnet = BBNBnet.eval()
_, logalphas, z,logzerr = BBNBnet(colors_spec_test.cuda()) 
alphas = torch.exp(logalphas)
zb = (alphas * z).sum(1)
zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

df_bbnb = pd.DataFrame(np.c_[zb,zspec_test,mag_test], columns = ['zb_bbnb','zb_true','imag'])
df_bbnb['rerr_bbnb'] = (df_bbnb.zb_bbnb - df_bbnb.zb_true) / (1 + df_bbnb.zb_true)
print('Bias',np.nanmedian(df_bbnb.rerr_bbnb), 'scatter', sigma68(df_bbnb.rerr_bbnb))


Bias -0.021507839494646686 scatter 0.07961092558336426


In [171]:
BBznet = BBznet.eval()
_, logalphas, z,logzerr = BBznet(colors_spec_test.cuda()) 
alphas = torch.exp(logalphas)
zb = (alphas * z).sum(1)
zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

df_bbz = pd.DataFrame(np.c_[zb,zspec_test,mag_test], columns = ['zb_bbz','zb_true','imag'])
df_bbz['rerr_bbz'] = (df_bbz.zb_bbz - df_bbz.zb_true) / (1 + df_bbz.zb_true)
print('Bias',np.nanmedian(df_bbz.rerr_bbz), 'scatter', sigma68(df_bbz.rerr_bbz))


Bias -0.02222127321361156 scatter 0.08043980057354241


In [172]:
BBNBznet = BBNBznet.eval()
_, logalphas, z,logzerr = BBNBznet(colors_spec_test.cuda()) 
alphas = torch.exp(logalphas)
zb = (alphas * z).sum(1)
zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

df_bbnbz = pd.DataFrame(np.c_[zb,zspec_test,mag_test], columns = ['zb_bbnbz','zb_true','imag'])
df_bbnbz['rerr_bbnbz'] = (df_bbnbz.zb_bbnbz - df_bbnbz.zb_true) / (1 + df_bbnbz.zb_true)
print('Bias',np.nanmedian(df_bbnbz.rerr_bbnbz), 'scatter', sigma68(df_bbnbz.rerr_bbnbz))


Bias -0.015809053174913 scatter 0.079486788843396


In [173]:
df_bb.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bb_flagship2.csv', header = True, sep =',')
df_bbnb.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bbnb_flagship2.csv', header = True, sep =',')
df_bbz.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bbz_flagship2.csv', header = True, sep =',')
df_bbnbz.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bbnbz_flagship2.csv', header = True, sep =',')