# DP0 Photo-z Validation 

Ancillary notebook with photo-z exploration to produce additional plots for the IC report. 


---------------------------------------------------------------------------------

Processes on DES Science Portal:

| Pipeline | Product Log|
|---|:--:|
| Photo-z Training   | [7028](https://des-portal.linea.org.br/VP/getViewProcessCon?process_id=7028) |
| Photo-z Compute    | [7030](https://des-portal.linea.org.br/VP/getViewProcessCon?process_id=7030) |
| VAC DP0            | [7032](https://des-portal.linea.org.br/VP/getViewProcessCon?process_id=7032) |
| VAC DC2 (reference)| [7011](https://des-portal.linea.org.br/VP/getViewProcessCon?process_id=7011) |


_______________________________________________________________________________________

#### Imports

In [None]:
import numpy as np 
import matplotlib.pyplot as plt
import tables_io as io
%reload_ext autoreload
%autoreload 2

### Data
#### Photo-z Validation

In [None]:
valid_set_file = "/home/julia/ic-cluster-data/photoz_valid_1_._dnf_tsm_valid.fits" # process 7028
pz_valid = io.read(valid_set_file).to_pandas().query("MAG_R < 25")
pz_valid.columns

In [None]:
pz_valid.z_best.count()

#### Photo-z compute

In [None]:
# query = "select mag_g, mag_r, mag_i, z_best from vac_cluster_simulation.catalog_7032"
# vac_dp0 = db.fetchall_df(query)
# query = "select mag_g, mag_r, mag_i, z_best from vac_cluster_simulation.catalog_7011"
# vac_dc2 = db.fetchall_df(query)

#### Photo-z x spec-z

In [None]:
%%time
plt.figure(figsize=[6,5])#,dpi=300)
plt.title("DP0 Photo-z Validation - DNF", fontsize=16)
plt.hexbin(pz_valid['Z_SPEC'], pz_valid['z_best'], None, mincnt=1, cmap='viridis', gridsize=[200,200], bins='log')
cbar = plt.colorbar()
#plt.clim(vmax=10)
plt.xlabel("$z_{spec}$", fontsize=16)
plt.ylabel("$z_{phot}$", fontsize=16)
plt.xlim(0,1.7)
plt.ylim(0,1.7)
plt.grid(True)
plt.tight_layout()

In [None]:
plt.figure(figsize=[12,5])#,dpi=300)
plt.suptitle("DP0 Photo-z Validation - DNF", fontsize=16)
plt.subplot(121)
plt.plot(pz_valid['Z_SPEC'], pz_valid['z_best'], 'k.', alpha=0.4)
plt.plot([0,3],[0,3], 'r--', lw=2)
plt.xlabel("spec-$z$", fontsize=16)
plt.ylabel("photo-$z$", fontsize=16)
plt.xlim(0,2)
plt.ylim(0,2)
plt.grid(True)
plt.subplot(122)
bins=np.arange(0,2,0.04)
plt.hist(pz_valid['Z_SPEC'], bins=bins, label="spec-$z$", color="k", alpha=0.5)
plt.hist(pz_valid['z_best'], bins=bins, label="photo-$z$", histtype="step", ec="r", lw=2)
plt.xlabel("redshift", fontsize=16)
plt.ylabel("counts", fontsize=16)
plt.xlim(0,2)
#plt.ylim(0,1.1)
# plt.xticks(fontsize=14)
# plt.yticks(fontsize=14)
plt.legend()#ncol=2)
plt.grid(True)
plt.tight_layout()

### Photo-z Validation Metrics

In [None]:
def bias(zp, zs):
    return np.round(np.mean((zp-zs)/(1.+zs)), 4) 

def scatter(zp, zs):
    return np.round(np.std((zp-zs)/(1.+zs)) , 4)

def metrics(zp, zs):
    print(f"Bias:    {bias(zp, zs)}")
    print(f"Scatter: {scatter(zp, zs)}")

In [None]:
metrics(pz_valid['z_best'], pz_valid['Z_SPEC'])

In [None]:
global_bias = bias(pz_valid['z_best'], pz_valid['Z_SPEC'])
global_scatter = scatter(pz_valid['z_best'], pz_valid['Z_SPEC'])

In [None]:
zbin = np.arange(0,2,0.1)
zmid = np.arange(zbin[0]+((zbin[1]-zbin[0])/2.), zbin[-1]+((zbin[1]-zbin[0])/2.), zbin[1]-zbin[0])
zmid

In [None]:
bias_per_z = []
scatter_per_z = []
for i in range(len(zbin)-1):
    mask = (pz_valid['Z_SPEC'] > zbin[i]) & (pz_valid['Z_SPEC'] <= zbin[i+1])
    bias_per_z.append(bias(pz_valid['z_best'][mask], pz_valid['Z_SPEC'][mask]))
    scatter_per_z.append(scatter(pz_valid['z_best'][mask], pz_valid['Z_SPEC'][mask]))

In [None]:
plt.figure(figsize=[6,5])#,dpi=300)
plt.plot(pz_valid['Z_SPEC'], pz_valid['z_best'] - pz_valid['Z_SPEC'], 'k.', alpha=0.4)
plt.plot([0,3],[0,0], 'r--', lw=2)
plt.xlabel("spec-$z$", fontsize=16)
plt.ylabel("photo-$z$ - spec-$z$", fontsize=16)
plt.xlim(0,2)
plt.ylim(-1, 1)
plt.grid(True)
plt.tight_layout()

In [None]:
plt.figure(figsize=[6, 4], dpi=300)
plt.subplot(211)
plt.plot(zmid, bias_per_z, "bo-", label="global $b_{z}$="+f"{global_bias:.4f}")
plt.hlines(0, -1, 2, ls=":", color="k")#, label="0.0")
plt.legend()
plt.xlim(0, 2)
#plt.ylim(-0.08, 0.1)
plt.ylabel("$b_{z}$", fontsize=14)
#plt.text(0.02, -0.07, "ALL", fontsize=18)
plt.grid(True)

plt.subplot(212)
#plt.errorbar(zmid, y6_scatter_per_z, np.array(y6_scatter_err_per_z).transpose()*100)
plt.plot(zmid, scatter_per_z, "rs-", label="global $\sigma_{z}$="+f"{global_scatter:.4f}")
#plt.hlines(0.12, -1, 2, ls=":", color="k", label=0.12) 
plt.legend()
plt.xlim(0.0, 2)
#plt.ylim(0.02, 0.2)
plt.xlabel("redshift", fontsize=14)
plt.ylabel("$\sigma_{z}$", fontsize=14)
#plt.text(0.01, 0.03, "ALL", fontsize=18)
plt.grid(True)

plt.tight_layout()


### Subset of low-z objects with wrong photo-z

In [None]:
subset = pz_valid.query("Z_SPEC < 0.4 & (z_best - Z_SPEC) > 0.2" )

In [None]:
plt.figure(figsize=[6,5])#,dpi=300)
plt.plot(pz_valid['Z_SPEC'], pz_valid['z_best'] - pz_valid['Z_SPEC'], 'k.', alpha=0.4)
plt.plot(subset['Z_SPEC'], subset['z_best'] - subset['Z_SPEC'], 'r.')
plt.plot([0,3],[0,0], 'r--', lw=2)
plt.xlabel("spec-$z$", fontsize=16)
plt.ylabel("photo-$z$ - spec-$z$", fontsize=16)
plt.xlim(0,2)
plt.ylim(-1, 1)
plt.grid(True)
plt.tight_layout()

In [None]:
plt.figure(figsize=[12,5])#,dpi=300)
plt.suptitle("DP0 Photo-z Validation - DNF", fontsize=16)
plt.subplot(121)
plt.plot(pz_valid['Z_SPEC'], pz_valid['z_best'], 'k.', alpha=0.4)
plt.plot(subset['Z_SPEC'], subset['z_best'], 'r.')
plt.plot([0,3],[0,3], 'r--', lw=2)
plt.xlabel("spec-$z$", fontsize=16)
plt.ylabel("photo-$z$", fontsize=16)
plt.xlim(0,2)
plt.ylim(0,2)
plt.grid(True)
plt.subplot(122)
bins=np.arange(0,2,0.04)
plt.hist(pz_valid['Z_SPEC'], bins=bins, label="spec-$z$", color="k", alpha=0.5)
plt.hist(pz_valid['z_best'], bins=bins, label="photo-$z$", histtype="step", ec="g", lw=2)
plt.hist(subset['z_best'], bins=bins, label="$z<0.4$ w/ wrong photo-$z$", histtype="step", ec="r", lw=2)
plt.xlabel("redshift", fontsize=16)
plt.ylabel("counts", fontsize=16)
plt.xlim(0,2)
#plt.ylim(0,1.1)
# plt.xticks(fontsize=14)
# plt.yticks(fontsize=14)
plt.legend()#ncol=2)
plt.grid(True)
plt.tight_layout()

In [None]:
plt.figure(figsize=[6,5])#,dpi=300)
plt.plot(pz_valid['MAG_R'], (pz_valid['MAG_R'] - pz_valid['MAG_I']), 'k.', alpha=0.4)
plt.plot(subset['MAG_R'], (subset['MAG_R'] - subset['MAG_I']), 'r.')
#plt.plot([0,3],[0,3], 'r--', lw=2)
plt.xlabel("mag $r$", fontsize=16)
plt.ylabel("$r-i$", fontsize=16)
plt.xlim(18,25)
plt.ylim(-0.5,1.5)
plt.grid(True)
plt.tight_layout()