In [1]:
import numpy as np
import astropy
import astropy.io.fits as fits
from astropy.io.fits import getdata
from astropy.table import Table
import healpy as hp
from astropy.coordinates import SkyCoord
from astropy import units as u
import os
import parsl
from parsl import python_app, bash_app
import matplotlib.pyplot as plt
from tqdm import tqdm
import sys
from time import sleep

parsl.clear()
parsl.load()

<parsl.dataflow.dflow.DataFlowKernel at 0x7facbc5fd2e0>

In [2]:
def join_cat(data, label_coluns, t_format, outfile):

    col0 = fits.Column(
        name=label_coluns[0], format=t_format[0], array=data[:, 0])
    col1 = fits.Column(
        name=label_coluns[1], format=t_format[1], array=data[:, 1])
    col2 = fits.Column(
        name=label_coluns[2], format=t_format[2], array=data[:, 2])
    col3 = fits.Column(
        name=label_coluns[3], format=t_format[3], array=data[:, 3])
    col4 = fits.Column(
        name=label_coluns[4], format=t_format[4], array=data[:, 4])
    col5 = fits.Column(
        name=label_coluns[5], format=t_format[5], array=data[:, 5])
    col6 = fits.Column(
        name=label_coluns[6], format=t_format[6], array=data[:, 6])
    col7 = fits.Column(
        name=label_coluns[7], format=t_format[7], array=data[:, 7])
    cols = fits.ColDefs([col0, col1, col2, col3, col4, col5, col6, col7])
    tbhdu = fits.BinTableHDU.from_columns(cols)
    tbhdu.writeto(outfile, overwrite=True)


def split_files(in_file, ra_str, dec_str, nside, path):

    os.system('mkdir -p ' + path)

    data = getdata(in_file)
    t = Table.read(in_file)
    label_columns = t.colnames
    t_format = []
    for i in label_columns:
        t_format.append(t[i].info.dtype)
    HPX = hp.ang2pix(nside, data[ra_str],
                     data[dec_str], nest=True, lonlat=True)

    HPX_un = np.unique(HPX)
    for i, j in enumerate(HPX_un):
        cond = (HPX == j)
        data_ = data[cond]
        col0 = fits.Column(
            name=label_columns[0], format=t_format[0], array=data_[label_columns[0]])
        col1 = fits.Column(
            name=label_columns[1], format=t_format[1], array=data_[label_columns[1]])
        col2 = fits.Column(
            name=label_columns[2], format=t_format[2], array=data_[label_columns[2]])
        col3 = fits.Column(
            name=label_columns[3], format=t_format[3], array=data_[label_columns[3]])
        col4 = fits.Column(
            name=label_columns[4], format=t_format[4], array=data_[label_columns[4]])
        col5 = fits.Column(
            name=label_columns[5], format=t_format[5], array=data_[label_columns[5]])
        col6 = fits.Column(
            name=label_columns[6], format=t_format[6], array=data_[label_columns[6]])
        col7 = fits.Column(
            name=label_columns[7], format=t_format[7], array=data_[label_columns[7]])
        cols = fits.ColDefs([col0, col1, col2, col3, col4, col5, col6, col7])
        tbhdu = fits.BinTableHDU.from_columns(cols)
        tbhdu.writeto(path + str(j) + '.fits', overwrite=True)
    ipix_cats = [path + str(j) + '.fits' for j in HPX_un]
    return ipix_cats


@python_app
def clean_input_cat(file_name, ra_str, dec_str, nside):

    output_file = file_name.split('.')[0] + '_clean.fits'

    data = getdata(file_name)
    t = Table.read(file_name)
    label_columns = t.colnames
    t_format = []
    for i in label_columns:
        t_format.append(t[i].info.dtype)
    HPX = hp.ang2pix(nside, data[ra_str],
                     data[dec_str], nest=True, lonlat=True)

    HPX_idx_sort = np.argsort(HPX)

    HPX_sort = [HPX[i] for i in HPX_idx_sort]
    data_sort = data[HPX_idx_sort]

    a, HPX_idx = np.unique(HPX_sort, return_inverse=True)

    # original order not preserved!
    HPX_un, HPX_counts = np.unique(HPX_sort, return_counts=True)

    HPX_single_star_pix = [
        j for i, j in enumerate(HPX_un) if HPX_counts[i] < 2]

    data_clean = np.array([data_sort[:][i] for i, j in enumerate(HPX_idx) if
                           HPX_un[j] in HPX_single_star_pix])

    col0 = fits.Column(
        name=label_columns[0], format=t_format[0], array=data_clean[:, 0])
    col1 = fits.Column(
        name=label_columns[1], format=t_format[1], array=data_clean[:, 1])
    col2 = fits.Column(
        name=label_columns[2], format=t_format[2], array=data_clean[:, 2])
    col3 = fits.Column(
        name=label_columns[3], format=t_format[3], array=data_clean[:, 3])
    col4 = fits.Column(
        name=label_columns[4], format=t_format[4], array=data_clean[:, 4])
    col5 = fits.Column(
        name=label_columns[5], format=t_format[5], array=data_clean[:, 5])
    col6 = fits.Column(
        name=label_columns[6], format=t_format[6], array=data_clean[:, 6])
    col7 = fits.Column(
        name=label_columns[7], format=t_format[7], array=data_clean[:, 7])
    cols = fits.ColDefs([col0, col1, col2, col3, col4, col5, col6, col7])
    tbhdu = fits.BinTableHDU.from_columns(cols)
    tbhdu.writeto(output_file, overwrite=True)

    # return HPX_un, HPX_sort, HPX_idx, HPX_counts, data_sort, label_columns, t_format
    # return label_columns, t_format


In [3]:
ipix_cats = split_files('results/des_mockcat_for_detection.fits', 'ra',
                        'dec', 64, 'results/hpx_cats/')

mkdir: invalid option -- 'r'
Try 'mkdir --help' for more information.


In [None]:
# for i in ipix_cats:
#   clean_input_cat(i, 'ra', 'dec', 2**17)
futures = list()

# Cria uma Progressbar (Opcional)
with tqdm(total=len(ipix_cats), file=sys.stdout) as pbar:
    pbar.set_description("Submit Parsls Tasks")

    # Submissão dos Jobs Parsl
    for i in ipix_cats:
        # os.register_at_fork(after_in_child=lambda: _get_font.cache_clear())
        futures.append(
            clean_input_cat(i, 'ra', 'dec', 2**17)
        )

        pbar.update()
# Fim da submissão dos Jobs,


# Espera todas as tasks Parsl terminarem
# Este loop fica monitarando as parsl.futures
# Até que todas tenham status done.
# Esse bloco todo é opicional

# Progressbar para acompanhar as parsl.tasks.
print("Tasks Done:")
with tqdm(total=len(futures), file=sys.stdout) as pbar2:
    # is_done é um array contendo True ou False para cada task
    # is_done.count(True) retorna a quantidade de tasks que já terminaram.
    is_done = list()
    done_count = 0
    while is_done.count(True) != len(futures):
        is_done = list()
        for f in futures:
            is_done.append(f.done())

        # Só atualiza a pbar se o valor for diferente.
        if is_done.count(True) != done_count:
            done_count = is_done.count(True)
            # Reset é necessário por que a quantidade de iterações
            # é maior que a quantidade de jobs.
            pbar2.reset(total=len(futures))
            # Atualiza a pbar
            pbar2.update(done_count)

        if done_count < len(futures):
            sleep(3)


Submit Parsls Tasks: 100%|██████████| 135/135 [00:00<00:00, 673.24it/s]
Tasks Done:
  0%|          | 0/135 [00:00<?, ?it/s]          

In [None]:
# HPX_single_star_pix = [j for i, j in enumerate(HPX_un) if HPX_counts[i] < 2]

# data_clean = np.array([data_sort[:][i] for i, j in enumerate(HPX_idx) if
#                       HPX_un[j] in HPX_single_star_pix])

# print(data_clean[0:20,1])
plt.scatter(data_sort['ra'], data_sort['dec'], c='b', s=10)
plt.scatter(data_clean[0:10], data_clean[0:10], c='r', s=3)
plt.show()

In [None]:
join_cat(data_clean, label_columns, t_format, 'cat_clean.fits')

In [None]:
#TODO: make a few plots showing the results