# Estimate memory usages

In this notebook we will estimate the total memory required for various runs

In [1]:
# What version of the Stack are we using?
! eups list -s | grep lsst_distrib
! eups list -s | grep obs_vista

lsst_distrib          g2d4714e03a+6e1aa0b536 	current w_2022_07 w_latest setup
obs_vista             23.0.0-1   	current setup


In [81]:
import os
from astropy.table import Table
import numpy as np
import json
from humanfriendly import format_size

## Memory per image/patch

Lets look at the example data butler

In [16]:
!ls data/VIRCAM/raw/all/raw/*/*/*.fit | wc -l

      48


In [26]:
!ls data/videoSingleFrame/20220216T105147Z/calexp/*/*/*/*/*.fits | wc -l

      35


In [118]:
!du -h -d 2 data

3.2G	data/videoSingleFrame/20220216T105147Z
3.2G	data/videoSingleFrame
 99M	data/video_gen3/ps1_pv3_3pi_20170110_vista
 99M	data/video_gen3
2.5M	data/videoMultiVisitLater/20220216T175637Z
1.1G	data/videoMultiVisitLater/20220216T175847Z
176M	data/videoMultiVisitLater/20220216T172039Z
1.3G	data/videoMultiVisitLater
1.1G	data/videoConfidence/confidence
1.1G	data/videoConfidence
927M	data/videoCoaddDetect/20220216T170915Z
928M	data/videoCoaddDetect
112K	data/VIRCAM/calib
3.3G	data/VIRCAM/raw
3.3G	data/VIRCAM
4.0K	data/skymaps/skyMap
4.0K	data/skymaps
 11G	data


In [129]:
raw   = (3_300_000_000+1_100_000_000)/48               #per 16 ccd fits file
proc  = 3_200_000_000*16/35                            #per 16 ccd fits file
coadd = 927_000_000/(2*2)                              #per patch per band
meas  = 1_100_000_000/(2*3)                            #per patch per band

In [120]:
format_size(raw),format_size(proc),format_size(coadd),format_size(meas)

('91.67 MB', '1.46 GB', '231.75 MB', '183.33 MB')

## HSC PDR2 Overlap processing

In [58]:
!ls ../../dmu1/data

hsc_images_overview.csv
hsc_images_overview_20210315.csv
hsc_images_overview_20210315.csv.old
vhs_images_overview.csv
vhs_images_overview_20201102.csv
vhs_images_overview_20210409.csv
vhs_images_overview_temp_790000.csv
vhs_tiles_tracts_patches_overview_20201102.csv
video_images_overview.csv
video_images_overview_20200820.csv
video_images_overview_20210322.csv
video_images_overview_20210330.csv
video_images_overview_20210331.csv
video_tiles_tracts_patches_overview_20200820.csv
viking_images_overview_20201218.csv
viking_images_overview_20210406.csv
viking_tiles_tracts_patches_overview_20201218.csv


In [93]:
vhs=Table.read('../../dmu1/data/vhs_images_overview_20210409.csv')
n_vhs=np.sum([s['file'].endswith('st.fit') for s in vhs])
vhs_tiles=Table.read('../../dmu1/data/vhs_tiles_tracts_patches_overview_20201102.csv')
vhs_tracts=set()
for i in vhs_tiles['tract_patch_json']:
    vhs_tracts=vhs_tracts.union(set(json.loads(i).keys()))
n_vhs_tracts=len(vhs_tracts)
"VHS: images: {}, tracts: {}".format(n_vhs,n_vhs_tracts)

'VHS: images: 204996, tracts: 8743'

In [94]:
vid=Table.read('../../dmu1/data/video_images_overview_20210331.csv')
n_vid=np.sum([s['file'].endswith('st.fit') for s in vid])
vid_tiles=Table.read('../../dmu1/data/video_tiles_tracts_patches_overview_20200820.csv')
vid_tracts=set()
for i in vid_tiles['tract_patch_json']:
    vid_tracts=vid_tracts.union(set(json.loads(i).keys()))
n_vid_tracts=len(vid_tracts)
"VIDEO: images: {}, tracts: {}".format(n_vid,n_vid_tracts)

'VIDEO: images: 13476, tracts: 18'

In [98]:
vik=Table.read('../../dmu1/data/viking_images_overview_20210406.csv')
n_vik=np.sum([s['file'].endswith('st.fit') for s in vik])
vik_tiles=Table.read('../../dmu1/data/viking_tiles_tracts_patches_overview_20201218.csv')
vik_tracts=set()
for i in vik_tiles['tract_patch_json']:
    vik_tracts=vik_tracts.union(set(json.loads(i).keys()))
n_vik_tracts=len(vik_tracts)
"VIKING: images: {}, tracts: {}".format(n_vik,n_vik_tracts)

'VIKING: images: 41615, tracts: 720'

In [138]:
def summary(n_images,n_patches,name,vista_bands,final_bands):
    print("""
Full area {} processing
Full processing of input images: {} 
Full coadd of all patches: {} 
Full photometry pipeline on all patches: {} 
Total memory for {} * 16 CCD images and {} patches: {} 
""".format(
    name,
    format_size(n_images * (raw+proc)),
    format_size(n_patches*coadd*vista_bands), #assume 6 times fewer images to coadd
    format_size(n_patches*meas*final_bands),  #assume JHK and LSST UGRIZY
    n_images, n_patches,
    format_size(
        (n_images * (raw+proc))
        + (n_patches*coadd*vista_bands)
        + (n_patches*meas*final_bands)
    )
))

In [139]:
summary(n_vhs,n_vhs_tracts*81,'VHS',3,3+6)


Full area VHS processing
Full processing of input images: 318.67 TB 
Full coadd of all patches: 492.36 TB 
Full photometry pipeline on all patches: 1.17 PB 
Total memory for 204996 * 16 CCD images and 708183 patches: 1.98 PB 



In [140]:
summary(n_vid,n_vid_tracts*81,'VIDEO',5,5+6)


Full area VIDEO processing
Full processing of input images: 20.95 TB 
Full coadd of all patches: 1.69 TB 
Full photometry pipeline on all patches: 2.94 TB 
Total memory for 13476 * 16 CCD images and 1458 patches: 25.58 TB 



In [141]:
summary(n_vik,n_vik_tracts*81,'VIKING',3,3+6)


Full area VIKING processing
Full processing of input images: 64.69 TB 
Full coadd of all patches: 40.55 TB 
Full photometry pipeline on all patches: 96.23 TB 
Total memory for 41615 * 16 CCD images and 58320 patches: 201.47 TB 

