# 1 Data overview

In this notebook we will catalogue all the HSC and VISTA image information required for processing. The HSC image corners will be used to find the relevant VISTA images required for swarping to the HSC pixel base.

In [1]:
import glob

from astropy.table import Table, Column
from astropy.io import fits
from astropy.wcs import WCS

import matplotlib.pyplot as plt

import numpy as np
import time

import os

import hashlib
import yaml
import collections

In [2]:
SUFFIX=time.strftime("%Y%m%d")
print(SUFFIX)

20210322


In [3]:

def data_loc(base=None):
    """Return data locations depending on machine used
    
    Returns
    -------
    str
        Full path to data repository
    """
    if base == None:
        cwd = os.getcwd()
        base = cwd.split('lsst-ir-fusion')[0] + 'lsst-ir-fusion/'
    
    
    overide = base + 'dmu0/data_local.yml'
    standard = base + 'dmu0/data_local_template.yml'
    print(overide)
    if os.path.exists(overide):
        data_loc = yaml.load(open(overide, 'rb'))
    else:
        data_loc = yaml.load(open(standard, 'rb'))
        for k, v in data_loc.items():
            #print(k,v)
            data_loc[k] = v.replace('BASE/', base)
    return data_loc
data_loc = data_loc()

/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/data_local.yml




In [4]:
RUN_HSC = False
RUN_VISTA = True
RESTART_VISTA = False
SURVEY = 'video'
RUN_PANSTARRS = False

RUN_HASH=False

In [5]:
data_loc

{'vista': '/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_VISTA/',
 'video': '/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/',
 'viking': '/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIKING/data/',
 'vhs': '/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VHS/data/',
 'hsc': '/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/',
 'panstarrs': '/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_PanSTARRS/data/',
 'gaia': '/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_GAIA/data/'}

## 1.1 HSC images
First lets look at the HSC images.

In [6]:
images = glob.glob(data_loc['hsc'] + "**/calexp*.fits", recursive=True)

In [7]:
meta = Table()
meta.add_column(Column(data=images, name="file"))

In [8]:
meta[:5]

file
str184
"/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Y/8524/5,5/calexp-HSC-Y-8524-5,5.fits"
"/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Y/8524/5,3/calexp-HSC-Y-8524-5,3.fits"
"/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Y/8524/5,4/calexp-HSC-Y-8524-5,4.fits"
"/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Y/8524/4,5/calexp-HSC-Y-8524-4,5.fits"
"/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Y/8524/4,4/calexp-HSC-Y-8524-4,4.fits"


#### Critical imformation
From each image we need:

1. Corner pixels
2. File sizes
3. Checksums

In [9]:
def file_as_bytes(file):
    with file:
        return file.read()

test_hash = hashlib.md5(file_as_bytes(open(meta[0]['file'], 'rb'))).hexdigest()
print( test_hash)

92a2ea799e73665f538ddb95d9151c51


In [10]:
%%timeit
h = hashlib.md5(file_as_bytes(open(meta[0]['file'], 'rb'))).hexdigest()

252 ms ± 23.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### 1.2.2 Run on all images
Now lets loop over all the images

In [11]:
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_0_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_0_y'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_x_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='ra_x_y'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_0_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_0_y'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_x_0'))
meta.add_column(Column(data=np.full(len(meta),  np.nan), name='dec_x_y'))
meta.add_column(Column(data=np.full(len(meta),  0), name='size'))
meta.add_column(Column(data=np.full(len(meta),  ''), name='hash', dtype='S{}'.format(len(test_hash))))

In [12]:

for im in meta:
    if not RUN_HSC:
        break
    try:
        file = im["file"]
        #print(file)
        hdu = fits.open(file)
        wcs = WCS(hdu[1].header)
        ra, dec = wcs.all_pix2world([0,0,hdu[1].header['NAXIS1'],hdu[1].header['NAXIS1']], 
                            [0,hdu[1].header['NAXIS2'],0,hdu[1].header['NAXIS2']], 1) 
        im['ra_0_0'] = ra[0]
        im['ra_0_y'] = ra[1]
        im['ra_x_0'] = ra[2]
        im['ra_x_y'] = ra[3]
        im['dec_0_0'] = dec[0]
        im['dec_0_y'] = dec[1]
        im['dec_x_0'] = dec[2]
        im['dec_x_y'] = dec[3]   
        im['size'] =  os.stat(im['file']).st_size
        if RUN_HASH:
            im['hash'] = hashlib.md5(file_as_bytes(open(im['file'], 'rb'))).hexdigest()
    except:
        print(im['file'],' failed')

In [13]:
meta[:5].show_in_notebook()

idx,file,ra_0_0,ra_0_y,ra_x_0,ra_x_y,dec_0_0,dec_0_y,dec_x_0,dec_x_y,size,hash
0,"/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Y/8524/5,5/calexp-HSC-Y-8524-5,5.fits",,,,,,,,,0,
1,"/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Y/8524/5,3/calexp-HSC-Y-8524-5,3.fits",,,,,,,,,0,
2,"/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Y/8524/5,4/calexp-HSC-Y-8524-5,4.fits",,,,,,,,,0,
3,"/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Y/8524/4,5/calexp-HSC-Y-8524-4,5.fits",,,,,,,,,0,
4,"/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_HSC/data/hsc-release.mtk.nao.ac.jp/archive/filetree/pdr2_dud/deepCoadd-results/HSC-Y/8524/4,4/calexp-HSC-Y-8524-4,4.fits",,,,,,,,,0,


In [14]:
if RUN_HSC:
    meta.write('./data/hsc_images_overview_{}.csv'.format(SUFFIX), overwrite=True)

## 1.2 VISTA images

Now lets investigate the VISTA images.

In [15]:
v_files = glob.glob(data_loc[SURVEY] + "/**/*.fit", recursive=True)

In [16]:
v_meta = Table()
v_meta.add_column(Column(data=v_files, name="file"))

In [17]:
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_0_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_0_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_x_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='ra_x_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_0_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_0_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_x_0'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='dec_x_y'))
v_meta.add_column(Column(data=np.full(len(v_meta),  '        '), name='filter'))
v_meta.add_column(Column(data=np.full(len(v_meta),  0), name='size'))
v_meta.add_column(Column(data=np.full(len(v_meta),  0), name='visit'))
v_meta.add_column(Column(data=np.full(len(v_meta),  0), name='exptime'))
v_meta.add_column(Column(data=np.full(len(v_meta),  ''), name='hash', dtype='S{}'.format(len(test_hash))))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='MAGZPT'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='MAGZRR'))
v_meta.add_column(Column(data=np.full(len(v_meta),  np.nan), name='SEEING'))



In [18]:
n_im = 0
if RESTART_VISTA:
    checkpoints = glob.glob('./data/{}_images_overview_temp_*.csv'.format(SURVEY))
    n_im = np.max(np.array([int(t.split('_')[-1][:-4] ) for t in checkpoints] ))
    restart_file = './data/{}_images_overview_temp_{}.csv'.format(SURVEY,n_im)
    print("Restarting with: {}".format(restart_file))
    v_meta = Table.read(restart_file)
    #START_VISTA

In [19]:

for im in v_meta[v_meta['size']==0]:
 
    if not RUN_VISTA:
        break
    file = im["file"]
    #print(file)
    n_im += 1
    try:
        hdu = fits.open(file)
        wcs = WCS(hdu[1].header)
        ra, dec = wcs.all_pix2world([0,0,hdu[1].header['NAXIS1'],hdu[1].header['NAXIS1']], 
                            [0,hdu[1].header['NAXIS2'],0,hdu[1].header['NAXIS2']], 1) 
        this_file = v_meta['file'] == file
        if file.endswith('_tl.fit'):
            v_meta['ra'][this_file] = hdu[1].header['RA']
            v_meta['dec'][this_file] = hdu[1].header['DEC'] 
            v_meta['filter'][this_file] = hdu[1].header['HIERARCH ESO INS FILT1 NAME']
        else:
            v_meta['ra'][this_file] = hdu[0].header['RA']
            v_meta['dec'][this_file] = hdu[0].header['DEC']
            v_meta['filter'][this_file] = hdu[0].header['HIERARCH ESO INS FILT1 NAME']
        v_meta['ra_0_0'][this_file] = ra[0]
        v_meta['ra_0_y'][this_file] = ra[1]
        v_meta['ra_x_0'][this_file] = ra[2]
        v_meta['ra_x_y'][this_file] = ra[3]
        v_meta['dec_0_0'][this_file] = dec[0]
        v_meta['dec_0_y'][this_file] = dec[1]
        v_meta['dec_x_0'][this_file] = dec[2]
        v_meta['dec_x_y'][this_file] = dec[3]   
        v_meta['size'][this_file] =  os.stat(im['file']).st_size
        if RUN_HASH:
            v_meta['hash'][this_file] = hashlib.md5(file_as_bytes(open(im['file'], 'rb'))).hexdigest()
        
        v_meta['visit'][this_file] = hdu[1].header['ESO DET EXP NO']
        v_meta['exptime'][this_file] = hdu[1].header['EXPTIME']
        v_meta['MAGZPT'][this_file] = hdu[1].header['MAGZPT']
        v_meta['MAGZRR'][this_file] = hdu[1].header['MAGZRR']
        v_meta['SEEING'][this_file] = hdu[1].header['SEEING']

    except:
        pass
    if n_im % 10000 == 0:
        v_meta.write('./data/{}_images_overview_temp_{}.csv'.format(SURVEY,n_im), overwrite=True)
        
    

 [astropy.io.fits.verify]
the RADECSYS keyword is deprecated, use RADESYSa. [astropy.wcs.wcs]
the PROJPn keyword is deprecated, use PVi_ma. [astropy.wcs.wcs]
the PROJPn keyword is deprecated, use PVi_ma. [astropy.wcs.wcs]
the PROJPn keyword is deprecated, use PVi_ma. [astropy.wcs.wcs]
the RADECSYS keyword is deprecated, use RADESYSa. [astropy.wcs.wcs]


In [24]:
v_meta[:5].show_in_notebook()

idx,file,ra,dec,ra_0_0,ra_0_y,ra_x_0,ra_x_y,dec_0_0,dec_0_y,dec_x_0,dec_x_y,filter,size,visit,exptime,hash,MAGZPT,MAGZRR,SEEING
0,/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00164.fit,35.523512,-4.90954,36.03155669770469,36.03298751837698,35.83912465612438,35.84004103666224,-4.266153773810008,-4.457500853364548,-4.265043153691725,-4.456717195732584,Ks,109091520,658729,60,,,,
1,/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00170.fit,35.428942,-4.90593,35.93707357978604,35.93847751340113,35.744642584201834,35.74553207435302,-4.262619875795639,-4.453967153311885,-4.261482399351048,-4.453156567314746,Ks,109408320,658735,60,,,,
2,/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00158.fit,35.527642,-4.91272,36.03568796887743,36.03711889042295,35.8432551302478,35.84417157367037,-4.269332535792607,-4.4606796146422285,-4.268221992414202,-4.459896034174582,Ks,109287360,658723,60,,,,
3,/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00102_st.fit,35.616442,-4.72313,36.12365296903962,36.12552008187017,35.92586958047189,35.92719310164043,-4.077977314187725,-4.276795598599529,-4.077171797589524,-4.276339668965059,Ks,84542400,658667,10,,23.05,0.045,2.018367
4,/Users/raphaelshirley/Documents/github/lsst-ir-fusion/dmu0/dmu0_VISTA/dmu0_VIDEO/data/20121122/v20121122_00159.fit,35.525487,-4.90308,36.033526927001006,36.03495754271935,35.84109650316589,35.84201275593501,-4.2596945858889335,-4.451041666875689,-4.258583809838531,-4.450257852449261,Ks,109255680,658724,60,,,,


In [21]:
if RUN_VISTA:
    v_meta.write('./data/{}_images_overview_{}.csv'.format(SURVEY, SUFFIX), overwrite=True)