In [1]:
from astropy.io import fits
from astropy.table import Table

import numpy as np
import os
import uuid

In [2]:
root_path = "/home/mike/git/computational_astro/astro_iqa"
data_path = "data/raw"
data_path = os.path.join(root_path, data_path)

# List all fits files in the data directory
fits_files = [f for f in os.listdir(data_path) if f.endswith(".fits.fz")]

In [3]:
# For each fits file, generate ldac file with sextractor
config_file = "xymfhe.sex"
for fits_file in fits_files:
    fits_path = os.path.join(data_path, fits_file)
    ldac_path = fits_path.replace(".fits.fz", ".ldac")
    if not os.path.exists(ldac_path):
        os.system("sex {} -c {}".format(fits_path, config_file))
        os.system("mv {} {}".format("test.cat", ldac_path))

In [3]:
def log_scale_data (data, mini, maxi):
    data = np.where(data < mini, mini, data)
    data = np.where(data > maxi, maxi, data)
    data = np.log10(data)
    return data

def remove_outliers (data, mini, maxi):
    data = np.where(data < mini, mini, data)
    data = np.where(data > maxi, maxi, data)
    return data

In [5]:
objects_catalog = "som_objects_catalog.hdf5"
fits_catalog = "som_fits_catalog.hdf5"
catalog_path = os.path.join(root_path, "data/processed")
filename = os.path.join(catalog_path, objects_catalog)

# For each ldac file, read each table of each ccd
for ldac_file in [f for f in os.listdir(data_path) if f.endswith(".ldac")]:
    fits_id, extension = os.path.splitext(ldac_file)
    ldac_path = os.path.join(data_path, ldac_file)
    ldac = fits.open(ldac_path)
    # print(ldac.info())
    ldac_tables = [hdu for hdu in ldac if isinstance(hdu, fits.BinTableHDU)]
    n_ccd = len(ldac_tables)
    fits_ = fits.open(os.path.join(data_path, fits_id+".fits.fz"))
    fits_catalog = Table(names=("fits_id", "filename", "n_ccd"), dtype=("S2", "S2", "i2"))
    fits_catalog.add_row([fits_id, fits_id+".fits.fz", n_ccd])
    for i in range(1, len(ldac)):
        if ldac[i].data is not None and ldac[i].data.shape[0] > 0:
            table = Table(ldac[i].data)
            table.add_column(fits_id, name="FITS_ID", index=0)
            table.add_column(i, name="CCD_ID", index=1)
            table.add_column(uuid.uuid4(), name="OBJECT_ID", index=2)
            try:
                exptime = fits_[i].header["EXPTIME"]
            except:
                exptime = 30
            table.add_column(exptime, name="EXPTIME")
            # normalise data before their registration
            try:
                table["ISO0"] = log_scale_data(table["ISO0"], 0.00001, 10000)
                table["ELLIPTICITY"] = remove_outliers(table["ELLIPTICITY"], 0.00001, 1)
                table["EXPTIME"] = log_scale_data(exptime * np.abs(table["BACKGROUND"]/np.mean(table["BACKGROUND"])), 0.00001, 30)
                table["BACKGROUND"] = remove_outliers((table["BACKGROUND"]-np.mean(table["BACKGROUND"])/np.std(table["BACKGROUND"])), -2., 2.)
                # print("Updating catalog file in {}".format(data_path))
                table.write(filename, path=filename, format="hdf5", overwrite=True, append=True)
            except:
                # In case one the ldac tables is empty or corrupted, we skip it
                continue
    fits_.close()
    ldac.close()

        Use textwrap.indent() instead. [astropy.io.fits.hdu.hdulist]
    Header size is not multiple of 2880: 4096
There may be extra bytes after the last HDU or the file is corrupted. [astropy.io.fits.hdu.hdulist]
