# Test Delight Apply

- author : Sylvie Dagoret-Campagne

- affiliation : IJCLab/IN2P3/CNRS
- creation date : March 17th 2021
- update : April 4th 2021

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats
import sys
sys.path.append('../')
from delight.io import *
from delight.utils import *
from delight.photoz_gp import PhotozGP
from delight.utils_cy import approx_flux_likelihood_cy
from time import time

In [2]:
from mpi4py import MPI
from delight.photoz_kernels import Photoz_mean_function, Photoz_kernel

In [3]:
logger = logging.getLogger(__name__)
coloredlogs.install(level='DEBUG', logger=logger,fmt='%(asctime)s,%(msecs)03d %(programname)s, %(name)s[%(process)d] %(levelname)s %(message)s')

In [4]:
plt.rcParams["figure.figsize"] = (6,4)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

# Configuration

- select if want to control Delight internal simulation or DC2

In [5]:
FLAG_DC2=True

In [6]:
if FLAG_DC2:
    configfilename='tmp/parametersTest.cfg'
else:
    configfilename='tmpsim/parametersTest.cfg'

# Load config and data

In [7]:
params = parseParamFile(configfilename, verbose=False)


comm = MPI.COMM_WORLD
threadNum = comm.Get_rank()
numThreads = comm.Get_size()


if threadNum == 0:
    #print("--- DELIGHT-APPLY ---")
    logger.info("--- DELIGHT-APPLY ---")



2021-04-07 18:48:59,311 ipykernel_launcher.py, __main__[399] INFO --- DELIGHT-APPLY ---


In [8]:
 # Read filter coefficients, compute normalization of filters
bandCoefAmplitudes, bandCoefPositions, bandCoefWidths, norms = readBandCoefficients(params)
numBands = bandCoefAmplitudes.shape[0]

redshiftDistGrid, redshiftGrid, redshiftGridGP = createGrids(params)
f_mod_interp = readSEDs(params)
nt = f_mod_interp.shape[0]
nz = redshiftGrid.size

dir_seds = params['templates_directory']
dir_filters = params['bands_directory']
lambdaRef = params['lambdaRef']
sed_names = params['templates_names']
f_mod_grid = np.zeros((redshiftGrid.size, len(sed_names),len(params['bandNames'])))


for t, sed_name in enumerate(sed_names):
    f_mod_grid[:, t, :] = np.loadtxt(dir_seds + '/' + sed_name +'_fluxredshiftmod.txt')

numZbins = redshiftDistGrid.size - 1
numZ = redshiftGrid.size

numObjectsTraining = np.sum(1 for line in open(params['training_catFile']))
numObjectsTarget = np.sum(1 for line in open(params['target_catFile']))
redshiftsInTarget = ('redshift' in params['target_bandOrder'])
Ncompress = params['Ncompress']

firstLine = int(threadNum * numObjectsTarget / float(numThreads))
lastLine = int(min(numObjectsTarget,(threadNum + 1) * numObjectsTarget / float(numThreads)))
numLines = lastLine - firstLine

if threadNum == 0:
    msg= 'Number of Training Objects ' +  str(numObjectsTraining)
    logger.info(msg)

    msg='Number of Target Objects ' + str(numObjectsTarget)
    logger.info(msg)

comm.Barrier()

msg= 'Thread '+ str(threadNum) + ' , analyzes lines ' +  str(firstLine) + ' to ' + str( lastLine)
logger.info(msg)

  numObjectsTraining = np.sum(1 for line in open(params['training_catFile']))
  numObjectsTarget = np.sum(1 for line in open(params['target_catFile']))
2021-04-07 18:48:59,489 ipykernel_launcher.py, __main__[399] INFO Number of Training Objects 3755
2021-04-07 18:48:59,490 ipykernel_launcher.py, __main__[399] INFO Number of Target Objects 10692
2021-04-07 18:48:59,491 ipykernel_launcher.py, __main__[399] INFO Thread 0 , analyzes lines 0 to 10692


In [9]:
DL = approx_DL()
gp = PhotozGP(f_mod_interp,\
              bandCoefAmplitudes, bandCoefPositions, bandCoefWidths,\
              params['lines_pos'], params['lines_width'],\
              params['V_C'], params['V_L'],\
              params['alpha_C'], params['alpha_L'],\
              redshiftGridGP, use_interpolators=True)

In [10]:
# Create local files to store results
numMetrics = 7 + len(params['confidenceLevels'])
localPDFs = np.zeros((numLines, numZ))
localMetrics = np.zeros((numLines, numMetrics))
localCompressIndices = np.zeros((numLines,  Ncompress), dtype=int)
localCompEvidences = np.zeros((numLines,  Ncompress))

In [None]:
# Looping over chunks of the training set to prepare model predictions over z
numChunks = params['training_numChunks']
for chunk in range(numChunks):
    TR_firstLine = int(chunk * numObjectsTraining / float(numChunks))
    TR_lastLine = int(min(numObjectsTraining, (chunk + 1) * numObjectsTarget / float(numChunks)))
    targetIndices = np.arange(TR_firstLine, TR_lastLine)
    numTObjCk = TR_lastLine - TR_firstLine
    redshifts = np.zeros((numTObjCk, ))
    model_mean = np.zeros((numZ, numTObjCk, numBands))
    model_covar = np.zeros((numZ, numTObjCk, numBands))
    bestTypes = np.zeros((numTObjCk, ), dtype=int)
    ells = np.zeros((numTObjCk, ), dtype=int)

    loc = TR_firstLine - 1
    trainingDataIter = getDataFromFile(params, TR_firstLine, TR_lastLine,prefix="training_", ftype="gpparams")

    # loop o training data and training GP coefficient 
    for loc, (z, ell, bands, X, B, flatarray) in enumerate(trainingDataIter):
        t1 = time()
        redshifts[loc] = z
        gp.setCore(X, B, nt,flatarray[0:nt+B+B*(B+1)//2])
        bestTypes[loc] = gp.bestType
        ells[loc] = ell
        model_mean[:, loc, :], model_covar[:, loc, :] = gp.predictAndInterpolate(redshiftGrid, ell=ell)
        t2 = time()
        # print(loc, t2-t1)

    # p_t = params['p_t'][bestTypes][None, :]
    # p_z_t = params['p_z_t'][bestTypes][None, :]
    prior = np.exp(-0.5*((redshiftGrid[:, None]-redshifts[None, :]) /params['zPriorSigma'])**2)
    # prior[prior < 1e-6] = 0
    # prior *= p_t * redshiftGrid[:, None] *
    # np.exp(-0.5 * redshiftGrid[:, None]**2 / p_z_t) / p_z_t

    if params['useCompression'] and params['compressionFilesFound']:
        fC = open(params['compressMargLikFile'])
        fCI = open(params['compressIndicesFile'])
        itCompM = itertools.islice(fC, firstLine, lastLine)
        iterCompI = itertools.islice(fCI, firstLine, lastLine)

    targetDataIter = getDataFromFile(params, firstLine, lastLine,prefix="target_", getXY=False, CV=False)
    # loop on target 
    for loc, (z, normedRefFlux, bands, fluxes, fluxesVar, bCV, dCV, dVCV) in enumerate(targetDataIter):
        t1 = time()
        ell_hat_z = normedRefFlux * 4 * np.pi * params['fluxLuminosityNorm'] * (DL(redshiftGrid)**2. * (1+redshiftGrid))
        ell_hat_z[:] = 1
        if params['useCompression'] and params['compressionFilesFound']:
            indices = np.array(next(iterCompI).split(' '), dtype=int)
            sel = np.in1d(targetIndices, indices, assume_unique=True)
            
            like_grid2 = approx_flux_likelihood(fluxes,fluxesVar,model_mean[:, sel, :][:, :, bands],\
            f_mod_covar=model_covar[:, sel, :][:, :, bands],\
            marginalizeEll=True, normalized=False,\
            ell_hat=ell_hat_z,\
            ell_var=(ell_hat_z*params['ellPriorSigma'])**2)
            
            like_grid *= prior[:, sel]
        else:
            like_grid = np.zeros((nz, model_mean.shape[1]))
            approx_flux_likelihood_cy(\
                    like_grid, nz, model_mean.shape[1], bands.size,\
                    fluxes, fluxesVar,\
                    model_mean[:, :, bands],\
                    model_covar[:, :, bands],\
                    ell_hat=ell_hat_z,\
                    ell_var=(ell_hat_z*params['ellPriorSigma'])**2)
            like_grid *= prior[:, :]
        t2 = time()
        localPDFs[loc, :] += like_grid.sum(axis=1)
        evidences = np.trapz(like_grid, x=redshiftGrid, axis=0)
        t3 = time()

        if params['useCompression'] and not params['compressionFilesFound']:
            if localCompressIndices[loc, :].sum() == 0:
                sortind = np.argsort(evidences)[::-1][0:Ncompress]
                localCompressIndices[loc, :] = targetIndices[sortind]
                localCompEvidences[loc, :] = evidences[sortind]
            else:
                dind = np.concatenate((targetIndices,localCompressIndices[loc, :]))
                devi = np.concatenate((evidences,localCompEvidences[loc, :]))
                sortind = np.argsort(devi)[::-1][0:Ncompress]
                localCompressIndices[loc, :] = dind[sortind]
                localCompEvidences[loc, :] = devi[sortind]

        if chunk == numChunks - 1\
                and redshiftsInTarget\
                and localPDFs[loc, :].sum() > 0:
            localMetrics[loc, :] = computeMetrics(z, redshiftGrid,localPDFs[loc, :],params['confidenceLevels'])
        t4 = time()
        if loc % 100 == 0:
            print(loc, t2-t1, t3-t2, t4-t3)

    if params['useCompression'] and params['compressionFilesFound']:
        fC.close()
        fCI.close()


0 6.0093302726745605 0.3276803493499756 0.07976293563842773


In [None]:
comm.Barrier()

if threadNum == 0:
    globalPDFs = np.zeros((numObjectsTarget, numZ))
    globalCompressIndices = np.zeros((numObjectsTarget, Ncompress), dtype=int)
    globalCompEvidences = np.zeros((numObjectsTarget, Ncompress))
    globalMetrics = np.zeros((numObjectsTarget, numMetrics))
else:
    globalPDFs = None
    globalCompressIndices = None
    globalCompEvidences = None
    globalMetrics = None

firstLines = [int(k*numObjectsTarget/numThreads) for k in range(numThreads)]
lastLines = [int(min(numObjectsTarget, (k+1)*numObjectsTarget/numThreads)) for k in range(numThreads)]
numLines = [lastLines[k] - firstLines[k] for k in range(numThreads)]

sendcounts = tuple([numLines[k] * numZ for k in range(numThreads)])
displacements = tuple([firstLines[k] * numZ for k in range(numThreads)])
comm.Gatherv(localPDFs,[globalPDFs, sendcounts, displacements, MPI.DOUBLE])

sendcounts = tuple([numLines[k] * Ncompress for k in range(numThreads)])
displacements = tuple([firstLines[k] * Ncompress for k in range(numThreads)])
comm.Gatherv(localCompressIndices,[globalCompressIndices, sendcounts, displacements, MPI.LONG])
comm.Gatherv(localCompEvidences,[globalCompEvidences, sendcounts, displacements, MPI.DOUBLE])
comm.Barrier()

sendcounts = tuple([numLines[k] * numMetrics for k in range(numThreads)])
displacements = tuple([firstLines[k] * numMetrics for k in range(numThreads)])
comm.Gatherv(localMetrics,[globalMetrics, sendcounts, displacements, MPI.DOUBLE])

comm.Barrier()

if threadNum == 0:
    fmt = '%.2e'
    fname = params['redshiftpdfFileComp'] if params['compressionFilesFound'] else params['redshiftpdfFile']
    np.savetxt(fname, globalPDFs, fmt=fmt)
    if redshiftsInTarget:
        np.savetxt(params['metricsFile'], globalMetrics, fmt=fmt)
    if params['useCompression'] and not params['compressionFilesFound']:
        np.savetxt(params['compressMargLikFile'],globalCompEvidences, fmt=fmt)
        np.savetxt(params['compressIndicesFile'],globalCompressIndices, fmt="%i")