In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import os

from torch import nn, optim
from torch.optim import lr_scheduler
import time


In [2]:
def sigma68(data): return 0.5*(pd.Series(data).quantile(q = 0.84) - pd.Series(data).quantile(q = 0.16))

In [3]:
def exposure_SN(cat_in, filters,  scale = True):

    sn_lims =  {'U': 25.25, 'G': 24.65, 'R': 24.15, 'I': 24.35, 'ZN': 23.95,  'H':25,'J':25, 'Y':25}
    #sn_lims =  {'U': 25.5, 'G': 24.4, 'R': 24.1, 'I': 24.1, 'ZN': 23.7,  'H':23.2,'J':23.2, 'Y':23.2}

    #sn_lims =  {'U':35, 'G':35, 'R': 40, 'I':40, 'ZN':40, 'H':35,'J':35, 'Y':35}
    #sn_lims =  {'U':27, 'G':27, 'R': 27, 'I':27, 'ZN':27, 'H':27,'J':27, 'Y':27}
    #sn_lims =  {'U':28, 'G':26, 'R': 25, 'I':25, 'ZN':26, 'H':25,'J':25, 'Y':25}

    
    scale_filters = [x for x in filters if scale == True]
    lims = [sn_lims[x] for x in filters]

    sn_val0 = 5
    R = 1

    sn_val = [(R*sn_val0 if x in scale_filters else sn_val0) for x in filters]

    D = sn_val*10**(np.array(lims) / 5.)

    mag = np.array(cat_in[[x for x in filters]])
    SN_arr =  D*10**(-0.2*mag)


    sn_min = 0
    sn_max = lims


    SN_arr = np.clip(SN_arr, sn_min, sn_max)
    SN = pd.DataFrame(SN_arr, columns=filters, index= cat_in.index)

    return SN



In [4]:
catalog = pd.read_csv('/data/astro/scratch/lcabayol/Euclid/PAUS_mock_Euclidbands.csv', sep = ',', header = 0, comment = '#')
#catalog = pd.read_csv('/cephfs/pic.es/astro/scratch/lcabayol/Euclid/Euclid_mock.csv', sep = ',', header = 0, comment = '#')

catalog = catalog.dropna()

In [5]:
Ngals = 35000
catalog = catalog.sample(Ngals)
catalog['imag'] = 26 - 2.5*np.log10(catalog.flux_nl_el_t_blanco_decam_i.values)
catalog = catalog[catalog.imag < 23]

In [6]:
catalog['ref_id'] = np.arange(len(catalog))

In [7]:
nb_names_old = ['flux_nl_el_t_pau_nb%s'%x for x in 455+10*np.arange(40)]
nb_names_new = ['NB%s'%x for x in 455+10*np.arange(40)]
nb_name_dict = dict(zip(nb_names_old, nb_names_new))

catalog_nb_nl = catalog.set_index('ref_id')[nb_names_old].rename(columns = nb_name_dict)

In [8]:
bb_names_old = ['flux_nl_el_t_cfht_u','flux_nl_el_t_blanco_decam_g','flux_nl_el_t_blanco_decam_r','flux_nl_el_t_blanco_decam_i','flux_nl_el_t_blanco_decam_z','flux_nl_el_t_euclid_nisp_h','flux_nl_el_t_euclid_nisp_j','flux_nl_el_t_euclid_nisp_y']
bb_names_new = ['U','G','R','I','ZN','H','J','Y']
bb_name_dict = dict(zip(bb_names_old, bb_names_new))

catalog_bb_nl = catalog.set_index('ref_id')[bb_names_old].rename(columns = bb_name_dict)

In [9]:
# load fits from Martin Eriksen to estimate SNR from the flux

snr_fit = pd.read_csv('/nfs/pic.es/user/l/lcabayol/Euclid/snr_fit.csv', sep = ',', header = 0)
factors = snr_fit[snr_fit.key == 'med'].reset_index()

aas = factors.a.values
bs = factors.b.values

f = 0.7
SNR_NB = np.exp(aas*np.log(f*np.abs(catalog_nb_nl.values)) + bs)
err = (np.abs(catalog_nb_nl.values) / SNR_NB)  * np.random.normal(0,1, size = (catalog_nb_nl.shape))

catalog_nb = pd.DataFrame(catalog_nb_nl + err, columns = nb_names_new, index = catalog_nb_nl.index)
catalog_nb_err = pd.DataFrame(np.abs(err), columns = nb_names_new, index = catalog_nb_nl.index)

In [10]:
catalog_bb_nl_mag = 26 - 2.5*np.log10(catalog_bb_nl)
filters = bb_names_new.copy()

SNR_flagship_BB = exposure_SN(catalog_bb_nl_mag, filters,  scale = True)

err = np.abs(catalog_bb_nl / SNR_flagship_BB)
err_rand = err * np.random.normal(0,1, size = (err.shape))

catalog_bb = catalog_bb_nl.values + np.abs(err_rand)
catalog_bb_err = np.abs(err_rand)

catalog_bb = pd.DataFrame(catalog_bb, columns = bb_names_new, index = catalog_bb_nl.index)
catalog_bb_err = pd.DataFrame(np.abs(err), columns = bb_names_new, index = catalog_bb_nl.index)


In [11]:
catalog_bb['target_zs'] = catalog.observed_redshift_gal.values

In [12]:
m = np.arange(18,24,1)
s68 = [0.0025,0.003,0.004,0.0045,0.0055,0.007]
fSNR  = np.polyfit(m,s68,2)
imag = catalog_bb.I.values

dispersion_z = fSNR[0] * catalog_bb.I.values**2 + fSNR[1]*catalog_bb.I.values  + fSNR[2]
photoz = catalog_bb.target_zs.values+  dispersion_z * np.random.normal(0,1,size = catalog_bb.shape[0])
catalog_bb['target_zb'] = photoz

In [13]:
catalog_bb['target_train'] = catalog_bb.target_zs
mask_train = np.random.choice([0,1], p = [0.7,0.3], size = len(catalog_bb))
catalog_bb['target_train'] = catalog_bb.target_train * mask_train

# TRAIN NETWORKS

In [14]:
BB_list = ['U','G','R','I','ZN','H','J','Y']
NB_list = catalog_nb.columns.tolist()

In [15]:
import MTLphotozs

## BB

In [16]:
catalog_bb_sub = catalog_bb[catalog_bb.target_train != 0]#.reset_index()
catalog_nb_sub = catalog_nb[catalog_nb.index.isin(catalog_bb_sub.index)]
catalog_nb_sub = 26-2.5*np.log10(catalog_nb_sub)

In [22]:
catalog_nb_sub

Unnamed: 0_level_0,NB455,NB465,NB475,NB485,NB495,NB505,NB515,NB525,NB535,NB545,...,NB755,NB765,NB775,NB785,NB795,NB805,NB815,NB825,NB835,NB845
ref_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,23.821380,23.910893,,25.229087,,22.884951,23.802457,,,...,21.866189,22.103579,21.947398,21.820551,21.634989,21.463247,21.663812,21.549687,21.608376,21.563543
4,24.205679,23.578037,23.868523,,23.719527,24.031548,23.710326,23.499196,23.461648,25.618238,...,22.390932,22.400258,22.011696,22.350180,22.056117,22.364125,22.204040,22.034060,22.256541,21.772407
7,24.379892,23.484776,23.038371,23.072309,24.197108,22.783638,24.155616,22.764440,22.223444,22.317165,...,21.722269,21.602030,21.733232,21.692501,21.869040,21.592550,21.874550,21.596562,21.882715,21.757098
8,21.559548,21.773858,21.415239,21.178268,21.521735,21.071240,21.026639,20.952516,20.982746,20.822754,...,19.127428,19.073816,19.027085,18.996505,18.944691,18.913608,18.926844,18.866132,18.817730,18.832927
10,24.425928,23.015307,,23.870268,24.147789,23.366811,24.255788,23.939156,22.912417,24.042905,...,21.520617,21.948184,21.715614,22.182187,22.287293,21.967796,21.933090,22.052844,22.047060,21.910640
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34985,24.651175,,25.517309,23.649772,23.672188,23.953281,23.583184,,23.587278,23.049593,...,22.036436,22.135465,21.606549,21.825660,21.620868,21.660761,21.697299,21.919124,21.336554,21.596447
34988,24.525535,23.474857,23.532024,23.636786,26.823554,23.299180,23.809783,22.761895,25.010791,23.588090,...,22.317043,23.473502,23.053561,22.338217,22.210532,22.411265,22.375807,22.130971,22.197990,23.933445
34992,19.215309,19.134325,19.190012,19.141167,19.061960,18.996881,18.974384,18.965481,19.014921,18.936403,...,18.575406,18.521731,18.546033,18.543120,18.518246,18.509786,18.485214,18.441388,18.459024,18.450661
34994,23.665079,24.116321,23.605848,24.969468,24.637532,23.782587,,23.408405,23.068405,23.955036,...,23.226907,22.595000,22.966355,23.382683,22.542243,22.940414,23.211468,22.988030,22.572822,22.574193


In [21]:
df_all = pd.DataFrame()
cuts = np.arange(0,12001,2000)

for k in range(len(cuts)-1):
    BB = MTLphotozs.mtl_photoz(zs = True, flagship=True)
    catalog_test = catalog_bb_sub.loc[cuts[k]:cuts[k+1]]
    catalog_training= catalog_bb_sub[~catalog_bb_sub.index.isin(catalog_test.index)]
    catalog_nb_train = catalog_nb_sub[catalog_nb_sub.index.isin(catalog_training.index)]
    
    training_loader = BB.create_loader(catalog_bb,catalog_nb)
    BBnet = BB.train_mtl(training_loader, epochs = 65)
    
    
    samps_BB_spec_test =  catalog_test[BB_list].values
    samps_BB_colors_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:]
    samps_BB_colors_test = torch.Tensor(samps_BB_colors_test)
    zb_test = catalog_test.target_zs.values
    refid_test = catalog_test.index
    _,logalphas, z,logzerr = BBnet(samps_BB_colors_test.cuda())
    
    alphas = torch.exp(logalphas)
    zb = (alphas * z).sum(1)
    zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

    df = pd.DataFrame(np.c_[refid_test,zb,zb_test], columns = ['ref_id','zb','zb_true'])
    df['rerr'] = (df.zb - df.zb_true) / (1 + df.zb_true)
    print('Bias',np.nanmedian(df.rerr), 'scatter', sigma68(df.rerr))
    
    df_all = pd.concat((df_all,df),0) 

Bias nan scatter nan


  df_all = pd.concat((df_all,df),0)


Bias nan scatter nan


KeyboardInterrupt: 

In [34]:
catalog_bb2 = ilbert[(ilbert.target_train == 0)&(ilbert.target_zb != 0)].reset_index()
samps_BB_spec_test = catalog_bb2[BB_list].values
zb_spec_test = catalog_bb2.target_zb.values
refid = catalog_bb2.paudm_id.values

samps_BB_colors_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:]
samps_BB_colors_test = torch.Tensor(samps_BB_colors_test)

_,logalphas, z,logzerr = BBnet(samps_BB_colors_test.cuda())

alphas = torch.exp(logalphas)
zb = (alphas * z).sum(1)
zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

df = pd.DataFrame(np.c_[refid,zb,zb_spec_test], columns = ['ref_id','zb','zb_true'])
df['rerr'] = (df.zb - df.zb_true) / (1 + df.zb_true)

print('Bias',np.nanmedian(df.rerr), 'scatter', sigma68(df.rerr))

df_all = pd.concat((df_all,df),0) 

Bias -0.001982978882552288 scatter 0.025065920614642693


  df_all = pd.concat((df_all,df),0)


In [35]:
df_bb = df_all.copy()

## BB + NB

In [40]:
catalog_bb = ilbert[ilbert.target_zb != 0].reset_index()
catalog_nb = FAC_memba_flux[FAC_memba_flux.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [46]:
df_all = pd.DataFrame()
cuts = np.linspace(0,len(catalog_bb),6)

for k in range(len(cuts)-1):
    BBNB = MTLphotozs.mtl_photoz(zs = False, zs_NB = True)
    catalog_test = catalog_bb.loc[cuts[k]:cuts[k+1]]
    catalog_training= catalog_bb[~catalog_bb.paudm_id.isin(catalog_test.paudm_id)]
    catalog_nb_train = catalog_nb[catalog_nb.index.isin(catalog_training.paudm_id)]
    
    training_loader = BBNB.create_loader(catalog_bb,catalog_nb)
    BBNBnet = BBNB.train_mtl(training_loader, epochs = 65)
    
    
    
    samps_BB_spec_test =  catalog_test[BB_list].values
    samps_BB_colors_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:]
    samps_BB_colors_test = torch.Tensor(samps_BB_colors_test)
    zb_test = catalog_test.target_zb.values
    refid_test = catalog_test.paudm_id.values
    _,logalphas, z,logzerr = BBNBnet(samps_BB_colors_test.cuda())
    
    alphas = torch.exp(logalphas)
    zb = (alphas * z).sum(1)
    zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

    df = pd.DataFrame(np.c_[refid_test,zb,zb_test], columns = ['ref_id','zb','zb_true'])
    df['rerr'] = (df.zb - df.zb_true) / (1 + df.zb_true)
    print('Bias',np.nanmedian(df.rerr), 'scatter', sigma68(df.rerr))
    
    df_all = pd.concat((df_all,df),0) 

Bias -0.002595923838374069 scatter 0.021717317362539207


  df_all = pd.concat((df_all,df),0)


Bias -0.001162225287548539 scatter 0.020690977672107102
Bias -0.00039415785025018944 scatter 0.018476537272597408
Bias -0.00015393309481222573 scatter 0.021184981824151735
Bias 0.0013030845051024574 scatter 0.020980659372275536


In [48]:
df_bbnb = df_all.copy()

## BB + z

In [49]:
catalog_bb = ilbert[ilbert.target_zb != 0].reset_index()
catalog_nb = FAC_memba_flux[FAC_memba_flux.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [51]:
df_all = pd.DataFrame()
cuts = np.linspace(0,len(catalog_bb),6)

for k in range(len(cuts)-1):
    BBz = MTLphotozs.mtl_photoz(zs = False, zs_zb = True)
    catalog_test = catalog_bb.loc[cuts[k]:cuts[k+1]]
    catalog_training= catalog_bb[~catalog_bb.paudm_id.isin(catalog_test.paudm_id)]
    catalog_nb_train = catalog_nb[catalog_nb.index.isin(catalog_training.paudm_id)]
    
    training_loader = BBz.create_loader(catalog_bb,catalog_nb)
    BBznet = BBz.train_mtl(training_loader, epochs = 65)
    
    
    samps_BB_spec_test =  catalog_test[BB_list].values
    samps_BB_colors_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:]
    samps_BB_colors_test = torch.Tensor(samps_BB_colors_test)
    zb_test = catalog_test.target_zb.values
    refid_test = catalog_test.paudm_id.values
    _,logalphas, z,logzerr = BBznet(samps_BB_colors_test.cuda())
    
    alphas = torch.exp(logalphas)
    zb = (alphas * z).sum(1)
    zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

    df = pd.DataFrame(np.c_[refid_test,zb,zb_test], columns = ['ref_id','zb','zb_true'])
    df['rerr'] = (df.zb - df.zb_true) / (1 + df.zb_true)
    print('Bias',np.nanmedian(df.rerr), 'scatter', sigma68(df.rerr))
    
    df_all = pd.concat((df_all,df),0) 

Bias -0.002335593162031184 scatter 0.01995378630293039


  df_all = pd.concat((df_all,df),0)


Bias 0.0009012521632165858 scatter 0.018831616140777937
Bias 0.0005407008238211374 scatter 0.017458708015574746
Bias -0.0012457090213748446 scatter 0.019492615422443647
Bias -0.0009953453551965901 scatter 0.019509919618210143


In [52]:
df_bbz = df_all.copy()

## BB + NB + z

In [53]:
catalog_bb = ilbert[ilbert.target_zb != 0].reset_index()
catalog_nb = FAC_memba_flux[FAC_memba_flux.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [54]:
df_all = pd.DataFrame()
cuts = np.linspace(0,len(catalog_bb),6)

for k in range(len(cuts)-1):
    BBNBz = MTLphotozs.mtl_photoz(zs = False, zs_NB_zb = True)
    catalog_test = catalog_bb.loc[cuts[k]:cuts[k+1]]
    catalog_training= catalog_bb[~catalog_bb.paudm_id.isin(catalog_test.paudm_id)]
    catalog_nb_train = catalog_nb[catalog_nb.index.isin(catalog_training.paudm_id)]
    
    training_loader = BBNBz.create_loader(catalog_bb,catalog_nb)
    BBNBznet = BBNBz.train_mtl(training_loader, epochs = 65)
    
    
    samps_BB_spec_test =  catalog_test[BB_list].values
    samps_BB_colors_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:]
    samps_BB_colors_test = torch.Tensor(samps_BB_colors_test)
    zb_test = catalog_test.target_zb.values
    refid_test = catalog_test.paudm_id.values
    _,logalphas, z,logzerr = BBNBznet(samps_BB_colors_test.cuda())
    
    alphas = torch.exp(logalphas)
    zb = (alphas * z).sum(1)
    zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

    df = pd.DataFrame(np.c_[refid_test,zb,zb_test], columns = ['ref_id','zb','zb_true'])
    df['rerr'] = (df.zb - df.zb_true) / (1 + df.zb_true)
    print('Bias',np.nanmedian(df.rerr), 'scatter', sigma68(df.rerr))
    
    df_all = pd.concat((df_all,df),0) 

Bias 0.00012340900843473473 scatter 0.02044099648749069


  df_all = pd.concat((df_all,df),0)


Bias 0.00035453601239973605 scatter 0.01893829508270138
Bias 0.0011081242069249262 scatter 0.017504762054009734
Bias -0.0020263393815864584 scatter 0.019030670592370832
Bias 0.00027751560026899867 scatter 0.01884780445773831


In [58]:
df_bbnbz = df_all.copy()

In [60]:
df_bb.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bb.csv', header = True, sep =',')
df_bbnb.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bbnb.csv', header = True, sep =',')
df_bbz.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bbz.csv', header = True, sep =',')
df_bbnbz.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bbnbz.csv', header = True, sep =',')