# 1 Data overview

In this notebook we will catalogue all the HSC and VISTA image information required for processing. The HSC image corners will be used to find the relevant VISTA images required for swarping to the HSC pixel base.

In [1]:
import glob

from astropy.table import Table, Column
from astropy.io import fits
from astropy.wcs import WCS

import matplotlib.pyplot as plt

import numpy as np
import time

import os

import hashlib
import yaml
import collections.abc

In [2]:
SUFFIX=time.strftime("%Y%m%d")
print(SUFFIX)

20200820


In [3]:

def data_loc(base=None):
    """Return data locations depending on machine used
    
    Returns
    -------
    str
        Full path to data repository
    """
    if base == None:
        cwd = os.getcwd()
        base = cwd.split('lsst-ir-fusion')[0] + 'lsst-ir-fusion/'
    
    
    overide = base + 'dmu0/data_local.yml'
    standard = base + 'dmu0/data_local_template.yml'
    print(overide)
    if os.path.exists(overide):
        data_loc = yaml.load(open(overide, 'rb'))
    else:
        data_loc = yaml.load(open(standard, 'rb'))
        for k, v in data_loc.items():
            #print(k,v)
            data_loc[k] = v.replace('BASE/', base)
    return data_loc
data_loc = data_loc()

/Users/rs548/GitHub/lsst-ir-fusion/dmu0/data_local.yml




In [4]:
RUN_HSC = False
RUN_VISTA = True
RESTART_VISTA = False
SURVEY = 'video'
RUN_PANSTARRS = False

In [5]:
data_loc

{'vista': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/',
 'video': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/',
 'viking': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIKING/data/',
 'vhs': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VHS/data/',
 'hsc': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/',
 'panstarrs': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_PanSTARRS/data/',
 'gaia': '/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_GAIA/data/'}

## 1.1 HSC images
First lets look at the HSC images.

In [6]:
images = glob.glob(data_loc['hsc'] + "**/calexp*.fits", recursive=True)

In [7]:
meta = Table()
meta.add_column(Column(data=images, name="file"))

In [8]:
meta

file
str165
"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits"


#### Critical imformation
From each image we need:

1. Corner pixels
2. File sizes
3. Checksums

In [9]:
def file_as_bytes(file):
    with file:
        return file.read()

test_hash = hashlib.md5(file_as_bytes(open(meta[0]['file'], 'rb'))).hexdigest()
print( test_hash)

1f85857f6fd6a9389ab8f7f09d8cb101


In [10]:
%%timeit
h = hashlib.md5(file_as_bytes(open(meta[0]['file'], 'rb'))).hexdigest()

253 ms ± 5.83 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
print(str(round((150000*0.5)/(60*60))) + ' hours')

21 hours


### 1.2.2 Run on all images
Now lets loop over all the images

In [12]:
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_0_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_0_y'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_x_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_x_y'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_0_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_0_y'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_x_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_x_y'))
meta.add_column(Column(data=np.full(len(meta),  0), name='size'))
meta.add_column(Column(data=np.full(len(meta),  ''), name='hash', dtype='S{}'.format(len(test_hash))))

In [13]:

for im in meta:
    if not RUN_HSC:
        break
    file = im["file"]
    #print(file)
    hdu = fits.open(file)
    wcs = WCS(hdu[1].header)
    ra, dec = wcs.all_pix2world([0,0,hdu[1].header['NAXIS1'],hdu[1].header['NAXIS1']], 
                            [0,hdu[1].header['NAXIS2'],0,hdu[1].header['NAXIS2']], 1) 
    im['ra_0_0'] = ra[0]
    im['ra_0_y'] = ra[1]
    im['ra_x_0'] = ra[2]
    im['ra_x_y'] = ra[3]
    im['dec_0_0'] = dec[0]
    im['dec_0_y'] = dec[1]
    im['dec_x_0'] = dec[2]
    im['dec_x_y'] = dec[3]   
    im['size'] =  os.stat(im['file']).st_size
    im['hash'] = hashlib.md5(file_as_bytes(open(im['file'], 'rb'))).hexdigest()

In [14]:
meta[:10].show_in_notebook()

idx,file,ra_0_0,ra_0_y,ra_x_0,ra_x_y,dec_0_0,dec_0_y,dec_x_0,dec_x_y,size,hash
0,"/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-R/8524/3,5/calexp-HSC-R-8524-3,5.fits",,,,,,,,,0,


In [15]:
if RUN_HSC:
    meta.write('./data/hsc_images_overview_{}.csv'.format(SUFFIX), overwrite=True)

## 1.2 VISTA images

Now lets investigate the VISTA images.

In [16]:
v_files = glob.glob(data_loc[SURVEY] + "/**/*.fit", recursive=True)

In [17]:
v_meta = Table()
v_meta.add_column(Column(data=v_files, name="file"))

In [18]:
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_0_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_0_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_x_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_x_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_0_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_0_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_x_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_x_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  '        '), name='filter'))
v_meta.add_column(Column(data=np.full(len(v_meta),  0), name='size'))
v_meta.add_column(Column(data=np.full(len(v_meta),  ''), name='hash', dtype='S{}'.format(len(test_hash))))

In [19]:
n_im = 0
if RESTART_VISTA:
    checkpoints = glob.glob('./data/{}_images_overview_temp_*.csv'.format(SURVEY))
    n_im = np.max(np.array([int(t.split('_')[-1][:-4] ) for t in checkpoints] ))
    restart_file = './data/{}_images_overview_temp_{}.csv'.format(SURVEY,n_im)
    print("Restarting with: {}".format(restart_file))
    v_meta = Table.read(restart_file)
    #START_VISTA

In [24]:

for im in v_meta[v_meta['size']==0]:
 
    if not RUN_VISTA:
        break
    file = im["file"]
    #print(file)
    n_im += 1
    try:
        hdu = fits.open(file)
        wcs = WCS(hdu[1].header)
        ra, dec = wcs.all_pix2world([0,0,hdu[1].header['NAXIS1'],hdu[1].header['NAXIS1']], 
                            [0,hdu[1].header['NAXIS2'],0,hdu[1].header['NAXIS2']], 1) 
        this_file = v_meta['file'] == file
        if file.endswith('_tl.fit'):
            v_meta['ra'][this_file] = hdu[1].header['RA']
            v_meta['dec'][this_file] = hdu[1].header['DEC'] 
            v_meta['filter'][this_file] = hdu[1].header['HIERARCH ESO INS FILT1 NAME']
        else:
            v_meta['ra'][this_file] = hdu[0].header['RA']
            v_meta['dec'][this_file] = hdu[0].header['DEC']
            v_meta['filter'][this_file] = hdu[0].header['HIERARCH ESO INS FILT1 NAME']
        v_meta['ra_0_0'][this_file] = ra[0]
        v_meta['ra_0_y'][this_file] = ra[1]
        v_meta['ra_x_0'][this_file] = ra[2]
        v_meta['ra_x_y'][this_file] = ra[3]
        v_meta['dec_0_0'][this_file] = dec[0]
        v_meta['dec_0_y'][this_file] = dec[1]
        v_meta['dec_x_0'][this_file] = dec[2]
        v_meta['dec_x_y'][this_file] = dec[3]   
        v_meta['size'][this_file] =  os.stat(im['file']).st_size
        v_meta['hash'][this_file] = hashlib.md5(file_as_bytes(open(im['file'], 'rb'))).hexdigest()
        
        
    except:
        pass
    if n_im % 10000 == 0:
        v_meta.write('./data/{}_images_overview_temp_{}.csv'.format(SURVEY,n_im), overwrite=True)
        
    

In [25]:
v_meta[:10].show_in_notebook()

idx,file,ra,dec,ra_0_0,ra_0_y,ra_x_0,ra_x_y,dec_0_0,dec_0_y,dec_x_0,dec_x_y,filter,size,hash
0,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00102_st.fit,35.616442,-4.72313,36.12365296903962,36.12552008187017,35.92586958047189,35.92719310164043,-4.077977314187725,-4.276795598599529,-4.077171797589524,-4.276339668965059,Ks,84542400,3319fb54e5a06ed024ee2c779d3eea16
1,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00235_st.fit,35.611492,-4.90492,36.12610999045986,36.12799560019295,35.92567612832217,35.927013085064964,-4.260849258430862,-4.458864267061002,-4.26000521655501,-4.458377949976223,Ks,86400000,3311861dca0dc2cc4c591c74ade46697
2,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00088_st_tl.fit,35.43265,-4.72313,36.1259211592099,36.13048720302681,34.91284867786413,34.914758535953574,-4.074792762244068,-5.561089113115699,-4.077214438128979,-5.563516534082509,Ks,216357120,95f818d1b99dcee749b276666480cdc6
3,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00214_st_tl.fit,35.427662,-4.72176,36.12821698635637,36.133033742529975,34.912513728999684,34.914670908547755,-4.074580595511128,-5.559726533947458,-4.07748170314107,-5.562633841232306,Ks,219761280,1fa2bf681b64e2c34cd63976c48ebd6d
4,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00207_st.fit,35.436996,-4.90735,35.94415038681624,35.946083891256066,35.74415693249861,35.74553771326806,-4.25982111275322,-4.458153923454694,-4.259167034486192,-4.457851984006502,Ks,87131520,30c4cf93b9f0f0cf0d3261792887984c
5,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00186_st.fit,35.620817,-4.72418,36.1273099599587,36.12925399598437,35.927356864252125,35.928750217721166,-4.076418668899508,-4.2747963003828655,-4.075634526012312,-4.274362759715051,Ks,86996160,f2d7ce96af8ba0e7dacad9502b130968
6,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00151_st.fit,35.619563,-4.91269,36.12897457417563,36.1309440114325,35.926964033757244,35.92836295863879,-4.25902765584969,-4.4604201350789845,-4.258262828599911,-4.460015583328658,Ks,87891840,b31848fb6f8fda487334ad79edf9f671
7,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00116_st.fit,35.524558,-4.90631,36.03226173409147,36.03414857812651,35.834395481721,35.83573620433609,-4.260776925843634,-4.459607645003357,-4.259978566975024,-4.459159214724387,Ks,84648960,a48b9f8674556a541dc64ae56646d847
8,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00179_st.fit,35.528933,-4.72417,36.036102623233226,36.038019421876925,35.83608127168605,35.83744677984761,-4.075840155220711,-4.27419221211796,-4.075110862928901,-4.273814512007562,Ks,86526720,ab1338e0fca179e1783af7e2d9a78187
9,/Users/rs548/GitHub/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00130_st.fit,35.435729,-4.72953,35.94541034612505,35.94739404313425,35.74341133771648,35.7448269875313,-4.0761093665967385,-4.277639042080953,-4.075351693245579,-4.277242337224645,Ks,88286400,2def2dc5fa82d81c006112aa76b725ab


In [26]:
if RUN_VISTA:
    v_meta.write('./data/{}_images_overview_{}.csv'.format(SURVEY, SUFFIX), overwrite=True)