# Estimate memory usages

In this notebook we will estimate the total memory required for various runs

In [1]:
# What version of the Stack are we using?
! eups list -s | grep lsst_distrib
! eups list -s | grep obs_vista

lsst_distrib          g0b29ad24fb+cd38383676 	current w_2022_38 w_latest setup
obs_vista             23.0.0-1   	current setup


In [2]:
import os
from astropy.table import Table
import numpy as np
import json
from humanfriendly import format_size

## Memory per image/patch

Lets look at the example data butler. in the example butler we don't run for all the ingested images so we need to first count files to get per file estimates

In [3]:
!ls data/VIRCAM/raw/all/raw/*/*/*.fit | wc -l

      48


In [4]:
!ls data/videoSingleFrame/20220216T105147Z/calexp/*/*/*/*/*.fits | wc -l

zsh:1: no matches found: data/videoSingleFrame/20220216T105147Z/calexp/*/*/*/*/*.fits
       0


In [5]:
!du -h -d 2 data

3.1G	data/videoSingleFrame/20220920T150232Z
3.1G	data/videoSingleFrame
 99M	data/video_gen3/ps1_pv3_3pi_20170110_vista
 99M	data/video_gen3
670M	data/videoMultiVisitLater/20220921T125623Z
670M	data/videoMultiVisitLater
1.1G	data/videoConfidence/confidence
1.1G	data/videoConfidence
2.8G	data/videoCoaddDetect/20220921T115801Z
302M	data/videoCoaddDetect/hscImports
3.1G	data/videoCoaddDetect
116K	data/VIRCAM/calib
3.3G	data/VIRCAM/raw
3.3G	data/VIRCAM
4.0K	data/skymaps/skyMap
4.0K	data/skymaps
 12G	data


In [6]:
raw   = (3_300_000_000+1_100_000_000)/48               #per 16 ccd fits file   data/VIRCAM/raw          data/videoConfidence
proc  = 3_200_000_000*16/35                            #per 16 ccd fits file   data/videoSingleFrame
coadd = 927_000_000/(2*2)                              #per patch per band     data/videoCoaddDetect
meas  = 3_100_000_000/(2*3)                            #per patch per band     data/videoMultiVisitLater

In [7]:
format_size(raw,binary=True),format_size(proc,binary=True),format_size(coadd,binary=True),format_size(meas,binary=True)

('87.42 MiB', '1.36 GiB', '221.01 MiB', '492.73 MiB')

## HSC PDR2 Overlap processing

In [8]:
!ls ../../dmu1/data

hsc_images_overview.csv
hsc_images_overview_20210315.csv
hsc_images_overview_20210315.csv.old
hsc_images_overview_20220526.csv
vhs_images_overview.csv
vhs_images_overview_20201102.csv
vhs_images_overview_20210409.csv
vhs_images_overview_temp_790000.csv
vhs_tiles_tracts_patches_overview_20201102.csv
video_images_overview.csv
video_images_overview_20200820.csv
video_images_overview_20210322.csv
video_images_overview_20210330.csv
video_images_overview_20210331.csv
video_tiles_tracts_patches_overview_20200820.csv
viking_images_overview_20201218.csv
viking_images_overview_20210406.csv
viking_tiles_tracts_patches_overview_20201218.csv


In [9]:
vhs=Table.read('../../dmu1/data/vhs_images_overview_20210409.csv')
n_vhs=np.sum([s['file'].endswith('st.fit') for s in vhs])
vhs_tiles=Table.read('../../dmu1/data/vhs_tiles_tracts_patches_overview_20201102.csv')
vhs_tracts=set()
for i in vhs_tiles['tract_patch_json']:
    vhs_tracts=vhs_tracts.union(set(json.loads(i).keys()))
n_vhs_tracts=len(vhs_tracts)
"VHS: images: {}, tracts: {}".format(n_vhs,n_vhs_tracts)

'VHS: images: 204996, tracts: 8743'

In [10]:
!ls -al ../dmu4_VHS/slurm/full*

-rw-r--r--  1 raphaelshirley  staff  3150451 21 Sep 14:24 ../dmu4_VHS/slurm/full_images_job_dict_16053.json
-rw-r--r--  1 raphaelshirley  staff   896453 21 Sep 15:23 ../dmu4_VHS/slurm/full_images_job_dict_4682.json
-rw-r--r--  1 raphaelshirley  staff  1382859 22 Apr  2021 ../dmu4_VHS/slurm/full_images_job_dict_7174.json
-rw-r--r--  1 raphaelshirley  staff  1567709 21 Sep 17:00 ../dmu4_VHS/slurm/full_images_job_dict_8157.json
-rw-r--r--  1 raphaelshirley  staff   265335 22 Apr  2021 ../dmu4_VHS/slurm/full_patch_job_dict_13164.json
-rw-r--r--  1 raphaelshirley  staff   310359 21 Sep 17:00 ../dmu4_VHS/slurm/full_patch_job_dict_15308.json


In [11]:
n_vhs_hsc3,n_vhs_hsc3_patches=8157,15308

In [12]:
vid=Table.read('../../dmu1/data/video_images_overview_20210331.csv')
n_vid=np.sum([s['file'].endswith('st.fit') for s in vid])
vid_tiles=Table.read('../../dmu1/data/video_tiles_tracts_patches_overview_20200820.csv')
vid_tracts=set()
for i in vid_tiles['tract_patch_json']:
    vid_tracts=vid_tracts.union(set(json.loads(i).keys()))
n_vid_tracts=len(vid_tracts)
"VIDEO: images: {}, tracts: {}".format(n_vid,n_vid_tracts)

'VIDEO: images: 13476, tracts: 18'

In [13]:
vik=Table.read('../../dmu1/data/viking_images_overview_20210406.csv')
n_vik=np.sum([s['file'].endswith('st.fit') for s in vik])
vik_tiles=Table.read('../../dmu1/data/viking_tiles_tracts_patches_overview_20201218.csv')
vik_tracts=set()
for i in vik_tiles['tract_patch_json']:
    vik_tracts=vik_tracts.union(set(json.loads(i).keys()))
n_vik_tracts=len(vik_tracts)
"VIKING: images: {}, tracts: {}".format(n_vik,n_vik_tracts)

'VIKING: images: 41615, tracts: 720'

In [14]:
!ls -al ../dmu4_VIKING/slurm/full*

-rw-r--r--  1 raphaelshirley  staff  3801991 22 Apr  2021 ../dmu4_VIKING/slurm/full_images_job_dict_17800.json
-rw-r--r--  1 raphaelshirley  staff  3948706 21 Sep 14:50 ../dmu4_VIKING/slurm/full_images_job_dict_18490.json
-rw-r--r--  1 raphaelshirley  staff   316245 22 Apr  2021 ../dmu4_VIKING/slurm/full_patch_job_dict_15557.json
-rw-r--r--  1 raphaelshirley  staff   415877 21 Sep 14:51 ../dmu4_VIKING/slurm/full_patch_job_dict_20205.json


In [15]:
n_vik_hsc3,n_vik_hsc3_patches=18490,20205

In [16]:
def summary(n_images,n_patches,name,vista_bands,final_bands):
    print("""
Full area {} processing
Full processing of input images: {} 
Full coadd of all patches: {} 
Full photometry pipeline on all patches: {} 
Total memory for {} * 16 CCD images and {} patches: {} 
""".format(
    name,
    format_size(n_images * (raw+proc),binary=True),
    format_size(n_patches*coadd*vista_bands,binary=True), #assume 6 times fewer images to coadd
    format_size(n_patches*meas*final_bands,binary=True),  #assume JHK and LSST UGRIZY
    n_images, n_patches,
    format_size(
        (n_images * (raw+proc))
        + (n_patches*coadd*vista_bands)
        + (n_patches*meas*final_bands)
        ,binary=True
    )
))

In [17]:
summary(n_vhs,n_vhs_tracts*81,'VHS',3,3+6)


Full area VHS processing
Full processing of input images: 289.83 TiB 
Full coadd of all patches: 447.8 TiB 
Full photometry pipeline on all patches: 2.92 PiB 
Total memory for 204996 * 16 CCD images and 708183 patches: 3.65 PiB 



In [19]:
summary(n_vhs_hsc3,n_vhs_hsc3_patches,'VHS + HSC PDR3',3,3+6)


Full area VHS + HSC PDR3 processing
Full processing of input images: 11.53 TiB 
Full coadd of all patches: 9.68 TiB 
Full photometry pipeline on all patches: 64.74 TiB 
Total memory for 8157 * 16 CCD images and 15308 patches: 85.95 TiB 



In [20]:
summary(n_vid,n_vid_tracts*81,'VIDEO',5,5+6)


Full area VIDEO processing
Full processing of input images: 19.05 TiB 
Full coadd of all patches: 1.54 TiB 
Full photometry pipeline on all patches: 7.54 TiB 
Total memory for 13476 * 16 CCD images and 1458 patches: 28.13 TiB 



In [21]:
summary(n_vik,n_vik_tracts*81,'VIKING',5,5+6)


Full area VIKING processing
Full processing of input images: 58.84 TiB 
Full coadd of all patches: 61.46 TiB 
Full photometry pipeline on all patches: 301.45 TiB 
Total memory for 41615 * 16 CCD images and 58320 patches: 421.75 TiB 



In [22]:
summary(n_vik_hsc3,n_vik_hsc3_patches,'VIKING + HSC PDR3',5,5+6)


Full area VIKING + HSC PDR3 processing
Full processing of input images: 26.14 TiB 
Full coadd of all patches: 21.29 TiB 
Full photometry pipeline on all patches: 104.44 TiB 
Total memory for 18490 * 16 CCD images and 20205 patches: 151.87 TiB 



In [23]:
85.95+28.13+151.87

265.95