In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import os

from torch import nn, optim
from torch.optim import lr_scheduler
import time


In [2]:
def sigma68(data): return 0.5*(pd.Series(data).quantile(q = 0.84) - pd.Series(data).quantile(q = 0.16))

In [3]:
from sqlalchemy.engine import create_engine
from sqlalchemy import *
from sqlalchemy.orm import *

engine = create_engine('postgresql://readonly@db03.pau.pic.es/dm')

query = f"""
    SELECT cm.paudm_id, cm.zspec, cm."I_auto", cm.ra, cm.dec, cm.conf, cm."Umag", cm."Bmag", cm."Vmag", cm."Rmag", cm."ICmag", cm."Zmag"
    FROM COSMOS as cm
    """

with engine.begin() as conn:
    conn.execute("SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE")
    ilbert = pd.read_sql(query, conn)


In [4]:
query = f"""
    SELECT fac.ref_id, fac.flux, fac.flux_error, fac.band
    FROM forced_aperture_coadd as fac
    WHERE fac.production_id = 948
    """

with engine.begin() as conn:
    conn.execute("SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE")
    FAC_memba = pd.read_sql(query, conn)

FAC_memba['flux'] = FAC_memba.flux / 0.625
FAC_memba['flux_error'] = FAC_memba.flux_error / 0.625

FAC_memba = FAC_memba.rename(columns = {'ref_id':'paudm_id'})

FAC_memba_flux = FAC_memba.pivot(index = 'paudm_id', columns = 'band', values = 'flux')


In [5]:
z = pd.read_csv('/data/astro/scratch/lcabayol/Euclid/cosmos_pz_cat_alex.csv', comment = '#', header = 0, sep = ',')
z = z.rename(columns = {'ref_id':'paudm_id'})
z = z.rename(columns = {'photoz':'zb_alex'})
z = z[z.zb_alex <1.5]
z = z.replace('N',np.nan, regex=True)
z = z.astype(np.float32)

In [6]:
ilbert = ilbert.merge(z[['paudm_id','zb_alex','zspec_mean']], on = ['paudm_id'])
ilbert = ilbert.fillna(0)


In [7]:

ilbert['target_zs'] = ilbert.zspec
ilbert.target_zs.where((ilbert.conf>3), 0, inplace = True)
ilbert.target_zs.where((ilbert.conf<5), 0, inplace = True)
ilbert.target_zs.where((ilbert.target_zs!=0), ilbert.zspec_mean, inplace = True)


In [8]:
ilbert = ilbert.fillna(0)

ilbert['target_zb'] = ilbert.zspec
ilbert.target_zb.where((ilbert.conf>3), 0, inplace = True)
ilbert.target_zb.where((ilbert.conf<5), 0, inplace = True)
ilbert.target_zb.where((ilbert.target_zb!=0), ilbert.zspec_mean, inplace = True)
ilbert.target_zb.where((ilbert.target_zb!=0), ilbert.zb_alex, inplace = True)


In [9]:
ilbert = ilbert[ilbert.zspec <1.5]
ilbert = ilbert[ilbert.target_zb <1.5]
ilbert = ilbert[ilbert.target_zs <1.5]

# TRAIN NETWORKS

In [17]:
NB_list = ['NB%s'%nb for nb in np.arange(455,855,10)]
BB_list = ['Umag','Bmag','Vmag','Rmag','ICmag','Zmag']

In [10]:
import MTLphotozs

## BB

In [61]:
catalog_bb = ilbert[ilbert.target_zs != 0].reset_index()
catalog_nb = FAC_memba_flux[FAC_memba_flux.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [62]:
df_all = pd.DataFrame()
cuts = np.arange(0,12001,2000)

for k in range(len(cuts)-1):
    BB = MTLphotozs.mtl_photoz(zs = True)
    catalog_test = catalog_bb.loc[cuts[k]:cuts[k+1]]
    catalog_training= catalog_bb[~catalog_bb.paudm_id.isin(catalog_test.paudm_id)]
    catalog_nb_train = catalog_nb[catalog_nb.index.isin(catalog_training.paudm_id)]
    
    training_loader = BB.create_loader(catalog_training,catalog_nb_train)
    BBnet = BB.train_mtl(training_loader, epochs = 65)
    
    
    samps_BB_spec_test =  catalog_test[BB_list].values
    samps_BB_colors_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:]
    samps_BB_colors_test = torch.Tensor(samps_BB_colors_test)
    zb_test = catalog_test.target_zs.values
    refid_test = catalog_test.paudm_id.values
    _,logalphas, z,logzerr = BBnet(samps_BB_colors_test.cuda())
    
    alphas = torch.exp(logalphas)
    zb = (alphas * z).sum(1)
    zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

    df = pd.DataFrame(np.c_[refid_test,zb,zb_test], columns = ['ref_id','zb','zb_true'])
    df['rerr'] = (df.zb - df.zb_true) / (1 + df.zb_true)
    print('Bias',np.nanmedian(df.rerr), 'scatter', sigma68(df.rerr))
    
    df_all = pd.concat((df_all,df),0) 

Bias 0.0011062346544466126 scatter 0.019286243211414346


  df_all = pd.concat((df_all,df),0)


Bias 0.002034141842501082 scatter 0.019297750239884694
Bias -0.0008375795495669301 scatter 0.01927020420762582
Bias 0.0011829641762256878 scatter 0.017623507839760244
Bias 0.0013760654789627016 scatter 0.01897631672919413
Bias -0.0011715691610996554 scatter 0.01969534836511853


In [63]:
catalog_bb2 = ilbert[(ilbert.target_zs == 0)&(ilbert.target_zb != 0)].reset_index()
samps_BB_spec_test = catalog_bb2[BB_list].values
zb_spec_test = catalog_bb2.target_zb.values
refid = catalog_bb2.paudm_id.values

samps_BB_colors_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:]
samps_BB_colors_test = torch.Tensor(samps_BB_colors_test)

_,logalphas, z,logzerr = BBnet(samps_BB_colors_test.cuda())

alphas = torch.exp(logalphas)
zb = (alphas * z).sum(1)
zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

df = pd.DataFrame(np.c_[refid,zb,zb_spec_test], columns = ['ref_id','zb','zb_true'])
df['rerr'] = (df.zb - df.zb_true) / (1 + df.zb_true)

print('Bias',np.nanmedian(df.rerr), 'scatter', sigma68(df.rerr))

df_all = pd.concat((df_all,df),0) 

Bias -0.0036433075794583074 scatter 0.026762067422364304


  df_all = pd.concat((df_all,df),0)


In [64]:
df_bb = df_all.copy()

## BB + NB

In [65]:
catalog_bb = ilbert[ilbert.target_zb != 0].reset_index()
catalog_nb = FAC_memba_flux[FAC_memba_flux.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [None]:
df_all = pd.DataFrame()
cuts = np.linspace(0,len(catalog_bb),6)

for k in range(len(cuts)-1):
    BBNB = MTLphotozs.mtl_photoz(zs = False, zs_NB = True)
    catalog_test = catalog_bb.loc[cuts[k]:cuts[k+1]]
    catalog_training= catalog_bb[~catalog_bb.paudm_id.isin(catalog_test.paudm_id)]
    catalog_nb_train = catalog_nb[catalog_nb.index.isin(catalog_training.paudm_id)]
    
    training_loader = BBNB.create_loader(catalog_training,catalog_nb_train)
    BBNBnet = BBNB.train_mtl(training_loader, epochs = 65)
    
    
    
    samps_BB_spec_test =  catalog_test[BB_list].values
    samps_BB_colors_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:]
    samps_BB_colors_test = torch.Tensor(samps_BB_colors_test)
    zb_test = catalog_test.target_zb.values
    refid_test = catalog_test.paudm_id.values
    _,logalphas, z,logzerr = BBNBnet(samps_BB_colors_test.cuda())
    
    alphas = torch.exp(logalphas)
    zb = (alphas * z).sum(1)
    zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

    df = pd.DataFrame(np.c_[refid_test,zb,zb_test], columns = ['ref_id','zb','zb_true'])
    df['rerr'] = (df.zb - df.zb_true) / (1 + df.zb_true)
    print('Bias',np.nanmedian(df.rerr), 'scatter', sigma68(df.rerr))
    
    df_all = pd.concat((df_all,df),0) 

Bias -0.002611006219901784 scatter 0.023784662895227366


  df_all = pd.concat((df_all,df),0)


Bias -0.0011848530565138932 scatter 0.021597347438734774
Bias -0.0013833372383899316 scatter 0.0202078736109631


In [None]:
df_bbnb = df_all.copy()

## BB + z

In [None]:
catalog_bb = ilbert[ilbert.target_zb != 0].reset_index()
catalog_nb = FAC_memba_flux[FAC_memba_flux.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [None]:
df_all = pd.DataFrame()
cuts = np.linspace(0,len(catalog_bb),6)

for k in range(len(cuts)-1):
    BBz = MTLphotozs.mtl_photoz(zs = False, zs_zb = True)
    catalog_test = catalog_bb.loc[cuts[k]:cuts[k+1]]
    catalog_training= catalog_bb[~catalog_bb.paudm_id.isin(catalog_test.paudm_id)]
    catalog_nb_train = catalog_nb[catalog_nb.index.isin(catalog_training.paudm_id)]
    
    training_loader = BBz.create_loader(catalog_training,catalog_nb_train)
    BBznet = BBz.train_mtl(training_loader, epochs = 65)
    
    
    samps_BB_spec_test =  catalog_test[BB_list].values
    samps_BB_colors_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:]
    samps_BB_colors_test = torch.Tensor(samps_BB_colors_test)
    zb_test = catalog_test.target_zb.values
    refid_test = catalog_test.paudm_id.values
    _,logalphas, z,logzerr = BBznet(samps_BB_colors_test.cuda())
    
    alphas = torch.exp(logalphas)
    zb = (alphas * z).sum(1)
    zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

    df = pd.DataFrame(np.c_[refid_test,zb,zb_test], columns = ['ref_id','zb','zb_true'])
    df['rerr'] = (df.zb - df.zb_true) / (1 + df.zb_true)
    print('Bias',np.nanmedian(df.rerr), 'scatter', sigma68(df.rerr))
    
    df_all = pd.concat((df_all,df),0) 

In [None]:
df_bbz = df_all.copy()

## BB + NB + z

In [None]:
catalog_bb = ilbert[ilbert.target_zb != 0].reset_index()
catalog_nb = FAC_memba_flux[FAC_memba_flux.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [None]:
df_all = pd.DataFrame()
cuts = np.linspace(0,len(catalog_bb),6)

for k in range(len(cuts)-1):
    BBNBz = MTLphotozs.mtl_photoz(zs = False, zs_NB_zb = True)
    catalog_test = catalog_bb.loc[cuts[k]:cuts[k+1]]
    catalog_training= catalog_bb[~catalog_bb.paudm_id.isin(catalog_test.paudm_id)]
    catalog_nb_train = catalog_nb[catalog_nb.index.isin(catalog_training.paudm_id)]
    
    training_loader = BBNBz.create_loader(catalog_training,catalog_nb_train)
    BBNBznet = BBNBz.train_mtl(training_loader, epochs = 65)
    
    
    samps_BB_spec_test =  catalog_test[BB_list].values
    samps_BB_colors_test = samps_BB_spec_test[:,:-1] - samps_BB_spec_test[:,1:]
    samps_BB_colors_test = torch.Tensor(samps_BB_colors_test)
    zb_test = catalog_test.target_zb.values
    refid_test = catalog_test.paudm_id.values
    _,logalphas, z,logzerr = BBNBznet(samps_BB_colors_test.cuda())
    
    alphas = torch.exp(logalphas)
    zb = (alphas * z).sum(1)
    zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

    df = pd.DataFrame(np.c_[refid_test,zb,zb_test], columns = ['ref_id','zb','zb_true'])
    df['rerr'] = (df.zb - df.zb_true) / (1 + df.zb_true)
    print('Bias',np.nanmedian(df.rerr), 'scatter', sigma68(df.rerr))
    
    df_all = pd.concat((df_all,df),0) 

In [None]:
df_bbnbz = df_all.copy()

In [None]:
df_bb.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bb.csv', header = True, sep =',')
df_bbnb.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bbnb.csv', header = True, sep =',')
df_bbz.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bbz.csv', header = True, sep =',')
df_bbnbz.to_csv('/data/astro/scratch/lcabayol/Euclid/referee_report/catalogues/df_bbnbz.csv', header = True, sep =',')