In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import os
import json
import numpy as np
data_dir = '/project/danielsf'
valid_visit_path = os.path.join(data_dir, 'valid_hsc_visits_DM_20525.txt')
assert os.path.isfile(valid_visit_path)
valid_data_id_list = []
with open(valid_visit_path, 'r') as in_file:
    for line in in_file:
        data_id = json.loads(line)
        valid_data_id_list.append(data_id)

In [2]:
unq_id = set()
for data_id in valid_data_id_list:
    visit_ccd = (data_id['visit'], data_id['ccd'])
    assert visit_ccd not in unq_id
    unq_id.add(visit_ccd)

In [3]:
len(unq_id)

44496

In [4]:
len(valid_data_id_list)

44496

In [5]:
from lsst.daf.persistence import Butler
#repo_dir = '/datasets/hsc/repo/rerun/DM-13666/UDEEP'
repo_dir = '/datasets/hsc/repo/rerun/RC/w_2019_30/DM-20525'
butler = Butler(repo_dir)

In [6]:
import time
from astropy.coordinates import SkyCoord
from astropy import units as u

In [7]:
print(valid_data_id_list[0])

{'visit': 318, 'ccd': 0, 'filter': 'HSC-Y'}


In [8]:
def extent_from_data_id(data_id_list, output_dict, my_lock, repo_dir):
    t_start = time.time()
    local_butler = Butler(repo_dir)
    bounds = {}
    ct = 0
    #theta = np.arange(0.0,2.0*np.pi,0.01)

    n_data = len(data_id_list)
    ccd = np.zeros(n_data, dtype=int)
    visit = np.zeros(n_data, dtype=int)
    hsc_filter = np.empty(n_data, dtype=(str,5))
    ra_center = np.zeros(n_data, dtype=float)
    dec_center = np.zeros(n_data, dtype=float)
    ra_min = np.zeros(n_data, dtype=float)
    ra_max = np.zeros(n_data, dtype=float)
    dec_min = np.zeros(n_data, dtype=float)
    dec_max = np.zeros(n_data, dtype=float)
    for ii, data_id in enumerate(data_id_list):
        tag = (data_id['visit'], data_id['ccd'])
        src = local_butler.get('src', dataId=data_id)
        _ra_max = src['coord_ra'].max()
        _ra_min = src['coord_ra'].min()
        _dec_max = src['coord_dec'].max()
        _dec_min = src['coord_dec'].min()
        _ra_center = 0.5*(_ra_max+_ra_min)
        _dec_center = 0.5*(_dec_max+_dec_min)

        ccd[ii] = data_id['ccd']
        visit[ii] = data_id['visit']
        hsc_filter[ii] = data_id['filter']
        ra_center[ii] = _ra_center
        dec_center[ii] = _dec_center
        ra_min[ii] = _ra_min
        ra_max[ii] = _ra_max
        dec_min[ii] = _dec_min
        dec_max[ii] = _dec_max
    
        ct += 1
        if ct%500 == 0:
            duration = (time.time()-t_start)/3600.0
            pred = duration/ct
            print(os.getpid(),ct,duration,pred)

    with my_lock:
        output_dict['ccd'].append(ccd)
        output_dict['visit'].append(visit)
        output_dict['filter'].append(hsc_filter)
        output_dict['ra_center'].append(ra_center)
        output_dict['dec_center'].append(dec_center)
        output_dict['ra_min'].append(ra_min)
        output_dict['dec_min'].append(dec_min)
        output_dict['ra_max'].append(ra_max)
        output_dict['dec_max'].append(dec_max)
    

In [9]:
import multiprocessing

mgr = multiprocessing.Manager()
output_dict = mgr.dict()
for k in ['ccd', 'visit', 'filter', 'ra_center',
         'dec_center', 'ra_min', 'dec_min',
         'ra_max', 'dec_max']:

    output_dict[k] = mgr.list()

my_lock = mgr.Lock()
p_list = []
d_data_id = 2000
for i_start in range(0,len(valid_data_id_list), d_data_id):
    p = multiprocessing.Process(target=extent_from_data_id,
                                args=(valid_data_id_list[i_start:i_start+d_data_id],
                                      output_dict, my_lock, repo_dir))
    p.start()
    p_list.append(p)
    while len(p_list)>=4:
        exit_code_list = []
        for p in p_list:
            exit_code_list.append(p.exitcode)
        for i_p in range(len(exit_code_list)-1, -1, -1):
            if exit_code_list[i_p] is not None:
                p_list.pop(i_p)
for p in p_list:
    p.join()
print('all done')
with open('/project/danielsf/DM-20525-spatial-log.txt','w') as out_file:
    out_file.write('all done\n')

7653 500 0.012919162445598178 2.5838324891196356e-05
7656 500 0.01643142408794827 3.286284817589654e-05
7659 500 0.019451782239807976 3.890356447961595e-05
7653 1000 0.025986652970314026 2.5986652970314027e-05
7656 1000 0.03561800009674496 3.561800009674496e-05
7662 500 0.036186169452137415 7.237233890427483e-05
7653 1500 0.03849785155720181 2.5665234371467874e-05
7659 1000 0.03962838285499149 3.962838285499149e-05
7653 2000 0.05172263264656067 2.5861316323280336e-05
7656 1500 0.05227557990286085 3.485038660190723e-05
7662 1000 0.06046123312579261 6.0461233125792606e-05
7659 1500 0.06186602320935991 4.124401547290661e-05
7656 2000 0.07110524608029259 3.55526230401463e-05
7704 500 0.03287602424621582 6.575204849243164e-05
7662 1500 0.08953588869836596 5.96905924655773e-05
7737 500 0.021416866580645243 4.283373316129049e-05
7659 2000 0.09653404626581404 4.826702313290702e-05
7769 500 0.01876058320204417 3.752116640408834e-05
7704 1000 0.06663344542185466 6.663344542185466e-05
7737 1000 0

In [10]:
out_dir = '/project/danielsf'
assert os.path.isdir(out_dir)

In [11]:
out_name = os.path.join(out_dir, 'valid_hsc_visit_extent_DM_20525.h5')
assert not os.path.exists(out_name)

In [12]:
import h5py
with h5py.File(out_name, 'w') as out_file:

    for k in ['ccd', 'visit', 'filter', 'ra_center',
             'dec_center', 'ra_min', 'dec_min',
             'ra_max', 'dec_max']:
        d_set = np.concatenate(output_dict[k])
        if k == 'filter':
            d_set = d_set.astype(bytes)
        out_file.create_dataset(k, data=d_set)

In [13]:
with h5py.File(out_name, 'r') as in_file:
    print(len(in_file['filter'][()]))

44496


In [None]:
out_name