# 1 Data overview

In this notebook we will catalogue all the HSC and VISTA image information required for processing. The HSC image corners will be used to find the relevant VISTA images required for swarping to the HSC pixel base.

In [1]:
import glob

from astropy.table import Table, Column
from astropy.io import fits
from astropy.wcs import WCS

import matplotlib.pyplot as plt

import numpy as np
import time

import os

import hashlib
import yaml
import collections

In [2]:
SUFFIX=time.strftime("%Y%m%d")
print(SUFFIX)

20201027


In [3]:

def data_loc(base=None):
    """Return data locations depending on machine used
    
    Returns
    -------
    str
        Full path to data repository
    """
    if base == None:
        cwd = os.getcwd()
        base = cwd.split('lsst-ir-fusion')[0] + 'lsst-ir-fusion/'
    
    
    overide = base + 'dmu0/data_local.yml'
    standard = base + 'dmu0/data_local_template.yml'
    print(overide)
    if os.path.exists(overide):
        data_loc = yaml.load(open(overide, 'rb'))
    else:
        data_loc = yaml.load(open(standard, 'rb'))
        for k, v in data_loc.items():
            #print(k,v)
            data_loc[k] = v.replace('BASE/', base)
    return data_loc
data_loc = data_loc()

/Users/rs548/GitHub/lsst-ir-fusion/dmu0/data_local.yml




In [4]:
RUN_HSC = False
RUN_VISTA = True
RESTART_VISTA = True
SURVEY = 'vhs'
RUN_PANSTARRS = False

In [5]:
data_loc

{'vista': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/',
 'video': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/',
 'viking': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIKING/data/',
 'vhs': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VHS/data/',
 'hsc': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/',
 'panstarrs': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_PanSTARRS/data/',
 'gaia': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_GAIA/data/'}

## 1.1 HSC images
First lets look at the HSC images.

In [6]:
images = glob.glob(data_loc['hsc'] + "**/calexp*.fits", recursive=True)

In [7]:
meta = Table()
meta.add_column(Column(data=images, name="file"))

In [8]:
meta

file
str357
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"


#### Critical imformation
From each image we need:

1. Corner pixels
2. File sizes
3. Checksums

In [9]:
def file_as_bytes(file):
    with file:
        return file.read()

test_hash = hashlib.md5(file_as_bytes(open(meta[0]['file'], 'rb'))).hexdigest()
print( test_hash)

1f85857f6fd6a9389ab8f7f09d8cb101


In [10]:
%%timeit
h = hashlib.md5(file_as_bytes(open(meta[0]['file'], 'rb'))).hexdigest()

265 ms ± 1.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
print(str(round((150000*0.5)/(60*60))) + ' hours')

21 hours


### 1.2.2 Run on all images
Now lets loop over all the images

In [12]:
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_0_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_0_y'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_x_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_x_y'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_0_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_0_y'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_x_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_x_y'))
meta.add_column(Column(data=np.full(len(meta),  0), name='size'))
meta.add_column(Column(data=np.full(len(meta),  ''), name='hash', dtype='S{}'.format(len(test_hash))))

In [13]:

for im in meta:
    if not RUN_HSC:
        break
    file = im["file"]
    #print(file)
    hdu = fits.open(file)
    wcs = WCS(hdu[1].header)
    ra, dec = wcs.all_pix2world([0,0,hdu[1].header['NAXIS1'],hdu[1].header['NAXIS1']], 
                            [0,hdu[1].header['NAXIS2'],0,hdu[1].header['NAXIS2']], 1) 
    im['ra_0_0'] = ra[0]
    im['ra_0_y'] = ra[1]
    im['ra_x_0'] = ra[2]
    im['ra_x_y'] = ra[3]
    im['dec_0_0'] = dec[0]
    im['dec_0_y'] = dec[1]
    im['dec_x_0'] = dec[2]
    im['dec_x_y'] = dec[3]   
    im['size'] =  os.stat(im['file']).st_size
    im['hash'] = hashlib.md5(file_as_bytes(open(im['file'], 'rb'))).hexdigest()

In [14]:
meta[:10].show_in_notebook()

idx,file,ra_0_0,ra_0_y,ra_x_0,ra_x_y,dec_0_0,dec_0_y,dec_x_0,dec_x_y,size,hash
0,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,
1,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,
2,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,
3,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,
4,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,
5,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,
6,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,
7,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,
8,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,
9,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,


In [15]:
if RUN_HSC:
    meta.write('./data/hsc_images_overview_{}.csv'.format(SUFFIX), overwrite=True)

## 1.2 VISTA images

Now lets investigate the VISTA images.

In [16]:
v_files = glob.glob(data_loc[SURVEY] + "/**/*.fit", recursive=True)

In [17]:
v_meta = Table()
v_meta.add_column(Column(data=v_files, name="file"))

In [18]:
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_0_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_0_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_x_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_x_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_0_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_0_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_x_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_x_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  '        '), name='filter'))
v_meta.add_column(Column(data=np.full(len(v_meta),  0), name='size'))
v_meta.add_column(Column(data=np.full(len(v_meta),  0), name='visit'))
v_meta.add_column(Column(data=np.full(len(v_meta),  0), name='exptime'))
v_meta.add_column(Column(data=np.full(len(v_meta),  ''), name='hash', dtype='S{}'.format(len(test_hash))))

In [19]:
n_im = 0
if RESTART_VISTA:
    checkpoints = glob.glob('./data/{}_images_overview_temp_*.csv'.format(SURVEY))
    n_im = np.max(np.array([int(t.split('_')[-1][:-4] ) for t in checkpoints] ))
    restart_file = './data/{}_images_overview_temp_{}.csv'.format(SURVEY,n_im)
    print("Restarting with: {}".format(restart_file))
    v_meta = Table.read(restart_file)
    #START_VISTA

In [20]:

for im in v_meta[v_meta['size']==0]:
 
    if not RUN_VISTA:
        break
    file = im["file"]
    #print(file)
    n_im += 1
    try:
        hdu = fits.open(file)
        wcs = WCS(hdu[1].header)
        ra, dec = wcs.all_pix2world([0,0,hdu[1].header['NAXIS1'],hdu[1].header['NAXIS1']], 
                            [0,hdu[1].header['NAXIS2'],0,hdu[1].header['NAXIS2']], 1) 
        this_file = v_meta['file'] == file
        if file.endswith('_tl.fit'):
            v_meta['ra'][this_file] = hdu[1].header['RA']
            v_meta['dec'][this_file] = hdu[1].header['DEC'] 
            v_meta['filter'][this_file] = hdu[1].header['HIERARCH ESO INS FILT1 NAME']
        else:
            v_meta['ra'][this_file] = hdu[0].header['RA']
            v_meta['dec'][this_file] = hdu[0].header['DEC']
            v_meta['filter'][this_file] = hdu[0].header['HIERARCH ESO INS FILT1 NAME']
        v_meta['ra_0_0'][this_file] = ra[0]
        v_meta['ra_0_y'][this_file] = ra[1]
        v_meta['ra_x_0'][this_file] = ra[2]
        v_meta['ra_x_y'][this_file] = ra[3]
        v_meta['dec_0_0'][this_file] = dec[0]
        v_meta['dec_0_y'][this_file] = dec[1]
        v_meta['dec_x_0'][this_file] = dec[2]
        v_meta['dec_x_y'][this_file] = dec[3]   
        v_meta['size'][this_file] =  os.stat(im['file']).st_size
        v_meta['hash'][this_file] = hashlib.md5(file_as_bytes(open(im['file'], 'rb'))).hexdigest()
        
        v_meta['visit'][this_file] = hdu[1].header['ESO DET EXP NO']
        v_meta['exptime'][this_file] = hdu[1].header['EXPTIME']
        

    except:
        pass
    if n_im % 10000 == 0:
        v_meta.write('./data/{}_images_overview_temp_{}.csv'.format(SURVEY,n_im), overwrite=True)
        
    

 [astropy.io.fits.verify]
the RADECSYS keyword is deprecated, use RADESYSa. [astropy.wcs.wcs]
the PROJPn keyword is deprecated, use PVi_ma. [astropy.wcs.wcs]
the PROJPn keyword is deprecated, use PVi_ma. [astropy.wcs.wcs]
the PROJPn keyword is deprecated, use PVi_ma. [astropy.wcs.wcs]
the RADECSYS keyword is deprecated, use RADESYSa. [astropy.wcs.wcs]


In [21]:
hdu[1].header

SIMPLE  =                    T                                                  
BITPIX  =                   32 / data type of original image                    
NAXIS   =                    2 / dimension of original image                    
NAXIS1  =                12770 / length of original image axis                  
NAXIS2  =                15655 / length of original image axis                  
PCOUNT  =                    0 / size of special data area                      
GCOUNT  =                    1 / one data group (required keyword)              
XTENSION= 'BINTABLE'           / binary table extension                         
ORIGIN  = 'ESO     '                    / European Southern Observatory         
DATE    = '2014-09-25T08:56:52'         / Date this file was written            
TELESCOP= 'VISTA   '                    / ESO Telescope Name                    
INSTRUME= 'VIRCAM  '                    / Instrument used.                      
OBJECT  = 'Str08   '        

In [22]:
v_meta[:10].show_in_notebook()

idx,file,ra,dec,ra_0_0,ra_0_y,ra_x_0,ra_x_y,dec_0_0,dec_0_y,dec_x_0,dec_x_y,filter,size,visit,exptime,hash
0,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VHS/data/20140924/v20140924_00570.fit,89.512887,-11.03727,90.02819265056802,90.02976674538284,89.83309207307911,89.83407354098694,-10.393808247352707,-10.58515438556661,-10.392783819141997,-10.584457609559358,Ks,89910720,35908,15,a3044ea57f687a604b432327b986a518
1,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VHS/data/20140924/v20140924_00570_st.fit,89.512887,-11.03727,90.0271649526715,90.02921657901464,89.8263788882918,89.82780573083764,-10.392653912963034,-10.58976634694837,-10.392037237373728,-10.589497860740982,Ks,77172480,35908,7,193758af484f2a1c08bcdfe430fb1056
2,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VHS/data/20140924/v20140924_00570_st_conf.fit,89.512887,-11.03727,90.02716363625248,90.0292152149582,89.82637849010239,89.82780524922262,-10.392649058245173,-10.589772372067468,-10.392036716633294,-10.589508269447236,Ks,29689920,35908,7,d2bd123c65c0f239a3198d7196154994
3,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VHS/data/20140924/v20140924_00570_st_tl.fit,89.512887,-11.03727,90.21551333787347,90.22139615308888,88.98530693274688,88.98500806710288,-10.20664505842024,-11.690872515139066,-10.208891509475812,-11.693130174054868,Ks,195995520,0,0,1c88dd1589d0c02a144c3434a4fad5f6


In [23]:
if RUN_VISTA:
    v_meta.write('./data/{}_images_overview_{}.csv'.format(SURVEY, SUFFIX), overwrite=True)