# Make slurm files required to produce SXDS joint VISTA-HSC data product.

In this notebook we will make all the slurm files required to run the whole VISTA-VIDEO HSC-DUD joint photometry pipeline.

We need to find all the patches in the HSC imaging and produce a slurm pipeline file for every patch or group of patches.

This will be a maximum of around 4 tracts * 91 patches per tract = 364 patches

We will also need to set up the data directories including linking relevant reference catalogues and copying the required HSC data products which are already processed.

In [1]:
from astropy.table import Table, Column
import numpy as np
import glob

In [2]:
HSC_LOC = '../../dmu0/dmu0_HSC/data'

In [3]:
video_ims = Table.read('../../dmu1/data/video_images_overview_20200820.csv')
hsc_ims = Table.read('../../dmu1/data/hsc_images_overview.csv')

## 1 Find all the relevant HSC SXDS patches and corresponding VIDEO images.

The first stage is parallesised by ccd. We will create one job for every date. This should be small enough to fit in a 24hr job.

### 1.1 Get the HSC DUD files in SXDS

In [4]:
sxds_tracts = [8282,8283,8284,8523,8524,8525,8765,8766,8767] #manually got these from HSC DR2 pages

In [5]:
hsc_ims['tract'] = [f.split('/')[14] for f in hsc_ims['file']]
hsc_ims['patch'] = [f.split('/')[15] for f in hsc_ims['file']]
hsc_ims['depth'] = [f.split('/')[11] for f in hsc_ims['file']]

In [6]:
in_sxds = hsc_ims['depth'] == 'pdr2_dud' 
in_sxds &= np.isin([int(t) for t in hsc_ims['tract'] ],  sxds_tracts)

In [7]:
np.sum(in_sxds)

1629

In [8]:
hsc_ims[in_sxds][:5]

file,ra_0_0,ra_0_y,ra_x_0,ra_x_y,dec_0_0,dec_0_y,dec_x_0,dec_x_y,size,hash,tract,patch,depth
str164,float64,float64,float64,float64,float64,float64,float64,float64,int64,str32,str5,str3,str9
"/home/ir-shir1/rds/rds-iris-ip005/data/public/HSC/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Z/8765/0,0/calexp-HSC-Z-8765-0,0.fits",35.057390881574044,35.05720789563249,34.865503651256965,34.86536233311517,-4.558457292032212,-4.367177057413561,-4.558655394587739,-4.367366830731112,210075840,93e4be0f6ccbf4f282001d491f830060,8765,0,pdr2_dud
"/home/ir-shir1/rds/rds-iris-ip005/data/public/HSC/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Z/8765/2,2/calexp-HSC-Z-8765-2,2.fits",34.68744599488356,34.687340888143005,34.490935877105265,34.49087447536722,-4.19018894495259,-3.994204115965604,-4.190282501066741,-3.994293285659841,149581440,bb471cb691de84ed694a8179c780c4da,8765,22,pdr2_dud
"/home/ir-shir1/rds/rds-iris-ip005/data/public/HSC/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Z/8765/0,6/calexp-HSC-Z-8765-0,6.fits",35.05632532497644,35.05613835071305,34.864680733040856,34.864536334791445,-3.4433065839296444,-3.2473362428975334,-3.4434561168248923,-3.24747724027541,64304640,bddc980e7d6fc6007a039863e628abad,8765,6,pdr2_dud
"/home/ir-shir1/rds/rds-iris-ip005/data/public/HSC/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Z/8765/1,3/calexp-HSC-Z-8765-1,3.fits",34.87444848193645,34.87430182037414,34.677990464977505,34.67788748292052,-4.003408172084183,-3.807423421527772,-4.003542229270651,-3.807550898996197,130622400,28975127e02dcd2d11576f7aa3911d6b,8765,13,pdr2_dud
"/home/ir-shir1/rds/rds-iris-ip005/data/public/HSC/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Z/8765/3,0/calexp-HSC-Z-8765-3,0.fits",34.50041317080672,34.500351147036746,34.30381449491872,34.30379517560638,-4.558891715533787,-4.367593215541012,-4.558942620240526,-4.367641979954217,198616320,7939cb9d2256af91b2e55dbe826edacc,8765,30,pdr2_dud


### 1.2 Find VIDEO images containing those patches

To begin we simply find all tiles which contain the centres of any of those patches

In [9]:
video_ims.add_column(Column(
    data= [t.split('/')[-2] for t in video_ims['file']],
    name='date'))

In [10]:
def fileToType(filename):
    filetype = ''
    types = {
        'tile':'_tl.fit',
        'stack':'_st.fit',
    }
    for k,v in types.items():
        #print(k,v)
        if filename.endswith(v):
            filetype = k
       
    return filetype
video_ims['type'] = [fileToType(f) for f in video_ims['file']]

In [11]:
#TODO make more sophisticated overlap tester. Use patches?
#make list of patches for every tile?
near_sxds = video_ims['type'] == 'tile'
near_sxds &= video_ims['ra'] > 32 #generous bounding bos for simplicity 
near_sxds &= video_ims['ra'] < 39
near_sxds &= video_ims['dec'] > -8
near_sxds &= video_ims['dec'] < -1
#Just run Ks for now
near_sxds &= video_ims['filter'] == 'Ks'

  result = getattr(super(), op)(other)
  result = getattr(super(), op)(other)


In [12]:
np.sum(near_sxds)

146

In [13]:
video_ims[near_sxds][:5]

file,ra,dec,ra_0_0,ra_0_y,ra_x_0,ra_x_y,dec_0_0,dec_0_y,dec_x_0,dec_x_y,filter,size,hash,date,type
str98,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,str2,int64,str32,str8,str5
/home/ir-shir1/rds/rds-iris-ip005/data/private/VISTA/VIDEO/20091203/v20091203_00166_st_tl.fit,36.484112,-4.64021,37.18581681881602,37.18827867938782,35.961889541645284,35.96170595209148,-3.984669774357833,-5.47895872985137,-3.985268141489924,-5.479559099311809,Ks,217598400,a06a0cb6fa58f2f8e20b2d58802d2b23,20091203,tile
/home/ir-shir1/rds/rds-iris-ip005/data/private/VISTA/VIDEO/20091203/v20091203_00268_st_tl.fit,36.483812,-4.64101,37.18469529855007,37.187078110381336,35.96147340582927,35.96121968956605,-3.991145815762932,-5.480101505496474,-3.9914464481413,-5.480404009464727,Ks,219611520,3749772a2e4d599ee23e16f9ee01a127,20091203,tile
/home/ir-shir1/rds/rds-iris-ip005/data/private/VISTA/VIDEO/20091203/v20091203_00226_st_tl.fit,36.484412,-4.63991,37.18171950099135,37.18401293691172,35.963467899633834,35.96313025527346,-3.985356697324692,-5.478446527962703,-3.985829986453056,-5.478921528693908,Ks,218891520,53c11b8ae8361559a4df18e117915bd1,20091203,tile
/home/ir-shir1/rds/rds-iris-ip005/data/private/VISTA/VIDEO/20100910/v20100910_00606_st_tl.fit,36.488854,-4.64268,37.18393726892561,37.18848156033916,35.96285187809026,35.96476365853805,-3.988288028193872,-5.478037426842484,-3.990614051393192,-5.480369114925005,Ks,214133760,28bc936c8f0c85c446e4dfa22b6c7768,20100910,tile
/home/ir-shir1/rds/rds-iris-ip005/data/private/VISTA/VIDEO/20100910/v20100910_00561_st_tl.fit,36.476458,-4.64362,37.182714564872846,37.18702680660112,35.96151091868087,35.96318593761777,-3.9868775809400456,-5.479183414777115,-3.9892291046049513,-5.481540192080265,Ks,213762240,b84da9427e1cbea4f8e2df100e9b5573,20100910,tile


In [14]:
video_ims[near_sxds].write('./data/sxds_tiles.csv')



In [15]:
#For simplicity lets ingest all the images (They are only links and this stage is fast)
#!mkdir data
#!mkdir slurm
#for date in date_list:
#    #!ingestImages.py data /path/to/vista/{date}/*[0-9].fit #Exposures
#    !ingestImages.py data /path/to/vista/{date}/*_st.fit #Stacks

In [16]:
#date_list = ['20121122', '20171027'] #test dates
date_list = np.unique(video_ims['date'][near_sxds])

In [17]:
date_list = [date[0:4]+'-'+date[4:6]+'-'+date[6:9] for date in date_list]

In [18]:
date_list

['2009-11-05',
 '2009-11-06',
 '2009-12-03',
 '2009-12-04',
 '2009-12-20',
 '2009-12-21',
 '2009-12-22',
 '2009-12-24',
 '2009-12-29',
 '2010-01-03',
 '2010-01-12',
 '2010-01-16',
 '2010-01-17',
 '2010-01-26',
 '2010-01-28',
 '2010-01-29',
 '2010-01-30',
 '2010-01-31',
 '2010-02-02',
 '2010-08-15',
 '2010-08-24',
 '2010-09-09',
 '2010-09-10',
 '2010-09-11',
 '2010-09-13',
 '2010-09-14',
 '2010-09-15',
 '2010-09-19',
 '2010-09-20',
 '2010-09-22',
 '2010-09-23',
 '2010-12-19',
 '2010-12-20',
 '2010-12-22',
 '2010-12-27',
 '2011-10-02',
 '2011-10-08',
 '2011-11-04',
 '2011-11-05',
 '2011-11-15',
 '2011-11-24',
 '2011-11-26',
 '2012-09-16',
 '2012-10-17',
 '2012-10-18',
 '2012-10-21',
 '2012-11-02',
 '2012-11-03',
 '2012-11-08',
 '2012-11-17',
 '2012-11-21',
 '2012-11-22',
 '2012-11-27',
 '2012-11-29',
 '2012-11-30',
 '2012-12-01',
 '2012-12-02',
 '2012-12-03',
 '2012-12-04',
 '2012-12-05',
 '2012-12-06',
 '2013-11-07',
 '2013-11-12',
 '2013-11-27',
 '2013-11-30',
 '2013-12-05',
 '2013-12-

## 2 Process CCDs

This stage is parallelised accroding to the raw files ingested. We are going to make one job per date

In [19]:
slurm_template= """
#!/bin/bash
#!
#! Example SLURM job script for Peta4-Skylake (Skylake CPUs, OPA)
#! Last updated: Mon 13 Nov 12:25:17 GMT 2017
#!

#!#############################################################
#!#### Modify the options in this section as appropriate ######
#!#############################################################

#! sbatch directives begin here ###############################
#! Name of the job:
#SBATCH -J {job_name}
#! Which project should be charged:
#SBATCH -A IRIS-IP005-CPU
#! How many whole nodes should be allocated?
#SBATCH --nodes=1
#! How many (MPI) tasks will there be in total? (<= nodes*32)
#! The skylake/skylake-himem nodes have 32 CPUs (cores) each.
#SBATCH --ntasks=1
#! How much wallclock time will be required?
#SBATCH --time=36:00:00
#! What types of email messages do you wish to receive?
#SBATCH --mail-type=FAIL
#! Uncomment this to prevent the job from being requeued (e.g. if
#! interrupted by node failure or system downtime):
##SBATCH --no-requeue

#! For 6GB per CPU, set "-p skylake"; for 12GB per CPU, set "-p skylake-himem": 
#SBATCH -p skylake

#! sbatch directives end here (put any additional directives above this line)

#! Notes:
#! Charging is determined by core number*walltime.
#! The --ntasks value refers to the number of tasks to be launched by SLURM only. This
#! usually equates to the number of MPI tasks launched. Reduce this from nodes*32 if
#! demanded by memory requirements, or if OMP_NUM_THREADS>1.
#! Each task is allocated 1 core by default, and each core is allocated 5980MB (skylake)
#! and 12030MB (skylake-himem). If this is insufficient, also specify
#! --cpus-per-task and/or --mem (the latter specifies MB per node).

#! Number of nodes and tasks per node allocated by SLURM (do not change):
numnodes=$SLURM_JOB_NUM_NODES
numtasks=$SLURM_NTASKS
mpi_tasks_per_node=$(echo "$SLURM_TASKS_PER_NODE" | sed -e  's/^\([0-9][0-9]*\).*$/\1/')
#! ############################################################
#! Modify the settings below to specify the application's environment, location 
#! and launch method:

#! Optionally modify the environment seen by the application
#! (note that SLURM reproduces the environment at submission irrespective of ~/.bashrc):
. /etc/profile.d/modules.sh                # Leave this line (enables the module command)
module purge                               # Removes all modules still loaded
module load rhel7/default-peta4            # REQUIRED - loads the basic environment

#! Insert additional module load commands after this line if needed:

#! Full path to application executable: 
application="{sh_name}"

#! Run options for the application:
options=""

#! Work directory (i.e. where the job will run):
workdir="$SLURM_SUBMIT_DIR"  # The value of SLURM_SUBMIT_DIR sets workdir to the directory
                             # in which sbatch is run.

#! Are you using OpenMP (NB this is unrelated to OpenMPI)? If so increase this
#! safe value to no more than 32:
export OMP_NUM_THREADS=1

#! Number of MPI tasks to be started by the application per node and in total (do not change):
np=$[${{numnodes}}*${{mpi_tasks_per_node}}]

#! The following variables define a sensible pinning strategy for Intel MPI tasks -
#! this should be suitable for both pure MPI and hybrid MPI/OpenMP jobs:
export I_MPI_PIN_DOMAIN=omp:compact # Domains are $OMP_NUM_THREADS cores in size
export I_MPI_PIN_ORDER=scatter # Adjacent domains have minimal sharing of caches/sockets
#! Notes:
#! 1. These variables influence Intel MPI only.
#! 2. Domains are non-overlapping sets of cores which map 1-1 to MPI tasks.
#! 3. I_MPI_PIN_PROCESSOR_LIST is ignored if I_MPI_PIN_DOMAIN is set.
#! 4. If MPI tasks perform better when sharing caches/sockets, try I_MPI_PIN_ORDER=compact.


#! Uncomment one choice for CMD below (add mpirun/mpiexec options if necessary):

#! Choose this for a MPI code (possibly using OpenMP) using Intel MPI.
CMD="mpirun -ppn $mpi_tasks_per_node -np $np $application $options"

#! Choose this for a pure shared-memory OpenMP parallel program on a single node:
#! (OMP_NUM_THREADS threads will be created):
#CMD="$application $options"

#! Choose this for a MPI code (possibly using OpenMP) using OpenMPI:
#CMD="mpirun -npernode $mpi_tasks_per_node -np $np $application $options"


###############################################################
### You should not have to change anything below this line ####
###############################################################

cd $workdir
echo -e "Changed directory to `pwd`.\n"

JOBID=$SLURM_JOB_ID

echo -e "JobID: $JOBID\n======"
echo "Time: `date`"
echo "Running on master node: `hostname`"
echo "Current directory: `pwd`"

if [ "$SLURM_JOB_NODELIST" ]; then
        #! Create a machine file:
        export NODEFILE=`generate_pbs_nodefile`
        cat $NODEFILE | uniq > machine.file.$JOBID
        echo -e "\nNodes allocated:\n================"
        echo `cat machine.file.$JOBID | sed -e 's/\..*$//g'`
fi

echo -e "\nnumtasks=$numtasks, numnodes=$numnodes, mpi_tasks_per_node=$mpi_tasks_per_node (OMP_NUM_THREADS=$OMP_NUM_THREADS)"

echo -e "\nExecuting command:\n==================\n$CMD\n"

eval $CMD 

"""

In [20]:
sh_template = """
!/bin/bash
source /rfs/project/rfs-L33A9wsNuJk/shared/lsst_stack/loadLSST.bash
setup lsst_distrib
setup obs_vista
processCcd.py ../data --rerun processCcdOutputs --id obsDate={obsDate}
"""

In [21]:

for date in date_list:
    sh_name = "./slurm/processCcd_{}.sh".format(date)
    f_sh = open(sh_name, "a")
    f_sh.write(sh_template.format(obsDate=date))
    f_sh.close()
    f_slurm = open("./slurm/processCcd_{}.slurm".format(date), "a")
    f_slurm.write(slurm_template.format(
        job_name="processCcd_{}".format(date),
        sh_name=sh_name
        
    ))
    f_slurm.close()


In [22]:
#We can now submit these after the processCcd has run with
#qsub ./slurm/processCcd*.slurm

## 3 Run full patch
Make one shell script and slurm script for each patch

In [23]:
#HSC preprocessed files must be copied into place
#!cp /Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R data/rerun/coaddPhot/deepCoadd-results/HSC-R


In [24]:
patch_sh_template = """
!/bin/bash
source /rfs/project/rfs-L33A9wsNuJk/shared/lsst_stack/loadLSST.bash
setup lsst_distrib
setup obs_vista
#makeCoaddTempExp.py data --rerun coadd --selectId filter=VISTA-Y --id filter=VISTA-Y tract={tract} patch={patch} 
#makeCoaddTempExp.py data --rerun coadd --selectId filter=VISTA-Ks --id filter=VISTA-J tract={tract} patch={patch} 
#makeCoaddTempExp.py data --rerun coadd --selectId filter=VISTA-Y --id filter=VISTA-H tract={tract} patch={patch} 
makeCoaddTempExp.py data --rerun coadd --selectId filter=VISTA-Ks --id filter=VISTA-Ks tract={tract} patch={patch} 

#assembleCoadd.py data --rerun coadd --selectId filter=VISTA-Y --id filter=VISTA-Y tract={tract} patch={patch}
#assembleCoadd.py data --rerun coadd --selectId filter=VISTA-Ks --id filter=VISTA-J tract={tract} patch={patch}
#assembleCoadd.py data --rerun coadd --selectId filter=VISTA-Y --id filter=VISTA-H tract={tract} patch={patch}
assembleCoadd.py data --rerun coadd --selectId filter=VISTA-Ks --id filter=VISTA-Ks tract={tract} patch={patch}

#detectCoaddSources.py data --rerun coadd:coaddPhot --id filter=VISTA-Y tract={tract} patch={patch}
#detectCoaddSources.py data --rerun coadd:coaddPhot --id filter=VISTA-J tract={tract} patch={patch}
#detectCoaddSources.py data --rerun coadd:coaddPhot --id filter=VISTA-H tract={tract} patch={patch}
detectCoaddSources.py data --rerun coadd:coaddPhot --id filter=VISTA-Ks tract={tract} patch={patch}

#HSC files must be copied - Which bands to merge detections from?
#mergeCoaddDetections.py data --rerun coaddPhot --id filter=VISTA-Y^VISTA-J^VISTA-H^VISTA-Ks^HSC-G^HSC-R^HSC-I^HSC-Z^HSC-Y tract={tract} patch={patches}
mergeCoaddDetections.py data --rerun coaddPhot --id filter=VISTA-Ks tract={tract} patch={patches}

#deblendCoaddSources.py data --rerun coaddPhot --id filter=VISTA-Y tract={tract} patch={patch}
#deblendCoaddSources.py data --rerun coaddPhot --id filter=VISTA-J tract={tract} patch={patch}
#deblendCoaddSources.py data --rerun coaddPhot --id filter=VISTA-H tract={tract} patch={patch}
deblendCoaddSources.py data --rerun coaddPhot --id filter=VISTA-Ks tract={tract} patch={patch}
#deblendCoaddSources.py data --rerun coaddPhot --id filter=HSC-G tract={tract} patch={patch}
#deblendCoaddSources.py data --rerun coaddPhot --id filter=HSC-R tract={tract} patch={patch}
#deblendCoaddSources.py data --rerun coaddPhot --id filter=HSC-I tract={tract} patch={patch}
#deblendCoaddSources.py data --rerun coaddPhot --id filter=HSC-Z tract={tract} patch={patch}
#deblendCoaddSources.py data --rerun coaddPhot --id filter=HSC-Y tract={tract} patch={patch}

#measureCoaddSources.py data --rerun coaddPhot --id filter=VISTA-Y tract={tract} patch={patch}
#measureCoaddSources.py data --rerun coaddPhot --id filter=VISTA-J tract={tract} patch={patch}
#measureCoaddSources.py data --rerun coaddPhot --id filter=VISTA-H tract={tract} patch={patch}
measureCoaddSources.py data --rerun coaddPhot --id filter=VISTA-Ks tract={tract} patch={patch}
#measureCoaddSources.py data --rerun coaddPhot --id filter=HSC-G tract={tract} patch={patch}
#measureCoaddSources.py data --rerun coaddPhot --id filter=HSC-R tract={tract} patch={patch}
#measureCoaddSources.py data --rerun coaddPhot --id filter=HSC-I tract={tract} patch={patch}
#measureCoaddSources.py data --rerun coaddPhot --id filter=HSC-Z tract={tract} patch={patch}
#measureCoaddSources.py data --rerun coaddPhot --id filter=HSC-Y tract={tract} patch={patch}

mergeCoaddMeasurements.py data --rerun coaddPhot --id filter=VISTA-Ks tract={tract} patch={patch}

#forcedPhotCoadd.py data --rerun coaddPhot:coaddForcedPhot --id filter=VISTA-Y tract={tract} patch={patch}
forcedPhotCoadd.py data --rerun coaddForcedPhot --id filter=VISTA-Ks tract={tract} patch={patch}
#forcedPhotCoadd.py data --rerun coaddForcedPhot --id filter=HSC-R tract={tract} patch={patch}

"""

In [25]:

for im in hsc_ims[in_sxds]:
    tract = im['tract']
    patch = im['patch']
    sh_name = "./slurm/runPhotoPipe_{}_{}.sh".format(tract, patch)
    f_sh = open(sh_name, "a")
    f_sh.write(patch_sh_template.format(tract=tract, patch=patch))
    f_sh.close()
    f_slurm = open("./slurm/processCcd_{}.slurm".format(date), "a")
    f_slurm.write(slurm_template.format(
        job_name="runPhotoPipe_{}_{}.sh".format(tract, patch),
        sh_name=sh_name
        
    ))
    f_slurm.close()

KeyError: 'patches'

In [None]:
#We can now submit these after the processCcd has run with
#qsub ./slurm/patch*.slurm