In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import os

from torch import nn, optim
from torch.optim import lr_scheduler
import time


In [2]:
def sigma68(data): return 0.5*(pd.Series(data).quantile(q = 0.84) - pd.Series(data).quantile(q = 0.16))

# TEST CFHT

In [3]:
d = '/data/astro/scratch/eriksen/data/vipers'
df_in = pd.read_csv(os.path.join(d,'vipers_full.csv'), comment='#')
df_in = df_in[df_in.alpha < 50]
df_in = df_in.rename(columns={'alpha': 'ra', 'delta': 'dec'})

In [5]:
from sqlalchemy.engine import create_engine
from sqlalchemy import *
from sqlalchemy.orm import *

engine = create_engine('postgresql://readonly@db03.pau.pic.es/dm')

query = f"""SELECT cm.paudm_id, cm.alpha_j2000, cm.delta_j2000, cm.mag_u, cm.mag_g, cm.mag_r, cm.mag_i, cm.mag_y, cm.mag_z, cm.magerr_u, cm.magerr_g, cm.magerr_r, cm.magerr_i, cm.magerr_y, cm.magerr_z
FROM cfhtlens as cm
WHERE cm.alpha_j2000 <50
AND cm.delta_j2000 < 0"""

with engine.begin() as conn:
    conn.execute("SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE")
    cfht_cat = pd.read_sql(query, conn)


In [6]:
cfht_cat = cfht_cat.rename(columns={'alpha_j2000': 'ra', 'delta_j2000': 'dec'})

In [7]:
from sklearn.neighbors import KDTree
inst = KDTree(cfht_cat[['ra', 'dec']])
dist, ind = inst.query(df_in[['ra', 'dec']])

In [8]:
index_name = cfht_cat.index.name
mapping = pd.DataFrame({'dist': dist[:, 0],
                        index_name: cfht_cat.index[ind[:, 0]],
                        'match_id': df_in.index,
                        'zspec': df_in.zspec, })
mapping = mapping.set_index(index_name)

In [9]:
max_dist = 1 / 3600.
mapping = mapping[mapping.dist < max_dist]

In [10]:
mapping = mapping.reset_index().drop_duplicates(['index'])

In [11]:
cfht_cat['zspec']= 0
cfht_cat.loc[mapping.loc[:,'index'],'zspec'] = mapping.zspec.values

In [12]:
cfht_cat = cfht_cat[~((np.abs(cfht_cat.mag_i)>50)&(np.abs(cfht_cat.mag_y)>50))]

In [13]:
mag_iy =  cfht_cat.mag_i
mag_iy = np.where(mag_iy<0,cfht_cat.mag_y,mag_iy)

In [14]:
magerr_iy =  cfht_cat.magerr_i
magerr_iy = np.where(magerr_iy<0,cfht_cat.magerr_y,magerr_iy)

In [15]:
cfht_cat['mag_iy'] = mag_iy

cfht_cat['magerr_iy'] = magerr_iy

In [16]:
cfht_cat = cfht_cat[cfht_cat.mag_iy>0]

In [17]:
cfht_cat=cfht_cat.drop(columns=['mag_i','mag_y','magerr_i','magerr_y']).rename(columns ={'magerr_iy':'magerr_i', 'mag_iy':'mag_i'})

In [18]:
cfht_cat = cfht_cat.dropna()

In [19]:
cfht_cat[['mag_u','mag_u','mag_r','mag_i','mag_z']] = cfht_cat[['mag_u','mag_g','mag_r','mag_i','mag_z']].replace(99, np.nan)
cfht_cat[['mag_u','mag_g','mag_r','mag_i','mag_z']] = cfht_cat[['mag_u','mag_g','mag_r','mag_i','mag_z']].replace(-99, np.nan)
cfht_cat[['magerr_u','magerr_g','magerr_r','magerr_i','magerr_z']] = cfht_cat[['magerr_u','magerr_g','magerr_r','magerr_i','magerr_z']].replace(-99, np.nan)
cfht_cat[['magerr_u','magerr_g','magerr_r','magerr_i','magerr_z']] = cfht_cat[['magerr_u','magerr_g','magerr_r','magerr_i','magerr_z']].replace(99, np.nan)

In [20]:
cfht_cat_zs = cfht_cat[cfht_cat.zspec!= 0]

In [21]:
NB_list = ['NB%s'%nb for nb in np.arange(455,855,10)]
BB_list = ['mag_u','mag_g','mag_r','mag_i','mag_z']

In [22]:
from sqlalchemy.engine import create_engine
from sqlalchemy import *
from sqlalchemy.orm import *

engine = create_engine('postgresql://readonly@db03.pau.pic.es/dm')

query = f"""SELECT fac.ref_id, fac.band, fac.flux, fac.flux_error
from forced_aperture_coadd as fac
WHERE fac.production_id=999"""

with engine.begin() as conn:
    conn.execute("SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE")
    coaddspaudm = pd.read_sql(query, conn)


In [23]:
from sqlalchemy.engine import create_engine
from sqlalchemy import *
from sqlalchemy.orm import *

engine = create_engine('postgresql://readonly@db03.pau.pic.es/dm')

query = f"""SELECT bcnz.ref_id, bcnz.zb, bcnz.zb_mean, bcnz.n_band
from photoz_bcnz as bcnz
WHERE bcnz.production_id=1017"""

with engine.begin() as conn:
    conn.execute("SET TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE")
    bcnz_memba = pd.read_sql(query, conn)


In [28]:
bcnz_memba=bcnz_memba.drop_duplicates('ref_id')

In [30]:
cfht_cat_paus = cfht_cat.merge(bcnz_memba, left_on = 'paudm_id', right_on = 'ref_id')
cfht_cat_paus = cfht_cat_paus.drop_duplicates('paudm_id')

In [31]:
cfht_cat_train = cfht_cat_paus[cfht_cat_paus.ra <34]
cfht_cat_test = cfht_cat_paus[(cfht_cat_paus.ra >34)&(cfht_cat_paus.zspec!= 0)]

In [32]:
cfht_cat_train['target_zs'] = cfht_cat_train.zspec
target_zb = cfht_cat_train.zspec
target_zb = np.where(target_zb==0,cfht_cat_train.zb,target_zb)
cfht_cat_train['target_zb'] = target_zb


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cfht_cat_train['target_zs'] = cfht_cat_train.zspec
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cfht_cat_train['target_zb'] = target_zb


In [33]:
cfht_cat_train = cfht_cat_train[['paudm_id','mag_u','mag_g','mag_r','mag_i','mag_z','target_zs','target_zb']]

In [35]:
cfht_cat_train = cfht_cat_train.dropna()

In [37]:
sample_test = cfht_cat_paus[cfht_cat_paus.zspec!=0].sample(10000)

In [None]:
coaddspaudm = coaddspaudm.pivot(index = 'ref_id', columns = 'band', values='flux').dropna()

In [53]:
cfht_cat_train = cfht_cat_train[cfht_cat_train.paudm_id.isin(coaddspaudm.index)]

## TRAIN

In [55]:
import MTLphotozs

## BB

In [56]:
catalog_bb = cfht_cat_train[cfht_cat_train.target_zs != 0].reset_index()
catalog_nb = coaddspaudm[coaddspaudm.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [60]:
BB = MTLphotozs.mtl_photoz(zs = True, BB_list = BB_list)
training_loader = BB.create_loader(catalog_bb,catalog_nb)
BBnet = BB.train_mtl(training_loader, epochs = 65)

## BB + NB

In [62]:
catalog_bb = cfht_cat_train[cfht_cat_train.target_zb != 0].reset_index()
catalog_nb = coaddspaudm[coaddspaudm.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [63]:
BBNB = MTLphotozs.mtl_photoz(zs = False, zs_NB = True, BB_list = BB_list)
training_loader = BBNB.create_loader(catalog_bb,catalog_nb)
BBNBnet = BBNB.train_mtl(training_loader, epochs = 65)

## BBz

In [None]:
catalog_bb = cfht_cat_train[cfht_cat_train.target_zb != 0].reset_index()
catalog_nb = coaddspaudm[coaddspaudm.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [None]:
BBz = MTLphotozs.mtl_photoz(zs = False, zs_zb = True, BB_list = BB_list)
training_loader =BBzBBNB.create_loader(catalog_bb,catalog_nb)
BBznet = BBz.train_mtl(training_loader, epochs = 65)

## BB + NB + z

In [None]:
catalog_bb = cfht_cat_train[cfht_cat_train.target_zb != 0].reset_index()
catalog_nb = coaddspaudm[coaddspaudm.index.isin(catalog_bb.paudm_id)]
catalog_nb = 26-2.5*np.log10(catalog_nb)

In [None]:
BBz = MTLphotozs.mtl_photoz(zs = False, zs_NB_zb = True, BB_list = BB_list)
training_loader =BBzBBNB.create_loader(catalog_bb,catalog_nb)
BBznet = BBz.train_mtl(training_loader, epochs = 65)

# TEST

In [None]:
mags_test = sample_test[BB_list]
colors_test = mags_test[:,:-1] - mags_test[:,1:]
colors_bb = torch.Tensor(colors_test)

In [100]:
BBnet = BBnet.eval()
_, logalphas, z,logzerr = BBnet(colors_bb.cuda())
alphas = torch.exp(logalphas)
zb = (alphas * z).sum(1)
zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

df_bb = pd.DataFrame(np.c_[zb,zspec_test,mag_test], columns = ['zb_bb','zb_true','imag'])
df_bb['rerr_bb'] = (df_bb.zb_bb - df_bb.zb_true) / (1 + df_bb.zb_true)
print('Bias',np.nanmedian(df_bb.rerr_bb), 'scatter', sigma68(df_bb.rerr_bb))


torch.Size([10000, 4])

In [101]:
BBNBnet = BBNBnet.eval()
_, logalphas, z,logzerr = BBNBnet(colors_bb.cuda()) 
alphas = torch.exp(logalphas)
zb = (alphas * z).sum(1)
zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

df_bbnb = pd.DataFrame(np.c_[zb,zspec_test,mag_test], columns = ['zb_bbnb','zb_true','imag'])
df_bbnb['rerr_bbnb'] = (df_bbnb.zb_bbnb - df_bbnb.zb_true) / (1 + df_bbnb.zb_true)
print('Bias',np.nanmedian(df_bbnb.rerr_bbnb), 'scatter', sigma68(df_bbnb.rerr_bbnb))


RuntimeError: mat1 and mat2 shapes cannot be multiplied (10000x4 and 5x100)

In [None]:
BBznet = BBznet.eval()
_, logalphas, z,logzerr = BBznet(colors_bb.cuda()) 
alphas = torch.exp(logalphas)
zb = (alphas * z).sum(1)
zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

df_bbz = pd.DataFrame(np.c_[zb,zspec_test,mag_test], columns = ['zb_bbz','zb_true','imag'])
df_bbz['rerr_bbz'] = (df_bbz.zb_bbz - df_bbz.zb_true) / (1 + df_bbz.zb_true)
print('Bias',np.nanmedian(df_bbz.rerr_bbz), 'scatter', sigma68(df_bbz.rerr_bbz))


In [None]:
BBNBznet = BBNBznet.eval()
_, logalphas, z,logzerr = BBNBznet(colors_bb.cuda()) 
alphas = torch.exp(logalphas)
zb = (alphas * z).sum(1)
zb,logzerr  = zb.detach().cpu().numpy(), logzerr.detach().cpu().numpy()

df_bbz = pd.DataFrame(np.c_[zb,zspec_test,mag_test], columns = ['zb_bbz','zb_true','imag'])
df_bbz['rerr_bbz'] = (df_bbz.zb_bbz - df_bbz.zb_true) / (1 + df_bbz.zb_true)
print('Bias',np.nanmedian(df_bbz.rerr_bbz), 'scatter', sigma68(df_bbz.rerr_bbz))
