In [1]:
import astropy.units as u
import numpy as np
from astroquery.simbad import Simbad
import astropy.coordinates as coord

from astropy.io import ascii
from astropy.io import fits
import os.path
# 
import elk
from elk.ensemble import EnsembleLC
from elk.lightcurve import BasicLightcurve

from astropy.table import Table, join, MaskedColumn, vstack, Column
from matplotlib import pyplot as plt
import glob

import numpy as np

from astroML.datasets import fetch_sdss_galaxy_images, fetch_LINEAR_sample, fetch_LINEAR_geneva
from astroML.utils import split_samples

import matplotlib.pyplot as plt

from sklearn.metrics import ConfusionMatrixDisplay, PrecisionRecallDisplay
from sklearn.gaussian_process import GaussianProcessRegressor

from tqdm import tqdm
from scipy.special import entr

from astropy.table import Column
from scipy.stats import entropy as entr
import time
import sys

In [3]:
path='/uufs/astro.utah.edu/common/home/u1363702/notebooks/tess_clusters/TESS_Cluster_Age_ML/light_curves/resampled_fits_tables/'
filenames = glob.glob(path + '*.fits')
f = open('cluster_names.txt', 'r')
names = f.readlines()
cnames = []
for i in names:
    t = i.split('\n')[0]
    cnames.append(t)

In [5]:
use_full_sectors = True

In [6]:

def get_cluster_data(name):
    cluster_files = glob.glob(path + '{}*.fits'.format(name))

    sectors_used = []
    for i in cluster_files:
        x = fits.open(i)
        header = x[0].header
        sectors_used.append(header['SECTORS'])
        x.close()
    l_of_cs=[]
    for file in cluster_files:
        data = elk.ensemble.from_fits(file)
        l_of_cs.append(data)

    ############

    l_of__all_lcs=[]
    for i in range(len(l_of_cs)):
        data = []
        med = 0
        for j in range(len(l_of_cs[i].lcs)):
            lc = l_of_cs[i].lcs[j].corrected_lc
            if j == 0:
                med = np.nanmedian(lc['flux'])
            else:
                delta = np.nanmedian(lc['flux']) - med
                lc['flux'] = lc['flux'] - delta
            data.append(lc)
        stitched_data = vstack(data)
        l_of__all_lcs.append(stitched_data)

    ############

    data_augmented_lcs=[]
    for i in range(len(l_of__all_lcs)):
        if use_full_sectors:
            data_augmented_lcs.append(l_of__all_lcs[i])

    ############

    names=[]
    for i in range(len(l_of_cs)):
        j=0
        if use_full_sectors:
            names.append(l_of_cs[i].callable)
        j+=1

    ############
        
    return l_of_cs, l_of__all_lcs, data_augmented_lcs, names, sectors_used

In [7]:
def print_progress_bar(i, total, length=30, start_time=None):
    percent = i / total
    filled = int(length * percent)
    bar = '█' * filled + '-' * (length - filled)

    elapsed = time.time() - start_time
    if i > 0:
        eta = (elapsed / i) * (total - i)
        eta_str = time.strftime("%H:%M:%S", time.gmtime(eta))
    else:
        eta_str = "--:--:--"

    sys.stdout.write(f'\rProgress: |{bar}| {round(percent * 100, 1)}% | ETA: {eta_str}')
    sys.stdout.flush()

In [11]:
def make_summary_table(augmented_data, names, sectors):
    l_of_stat_tables = []


    start_time = time.time()
    for i in range(len(data_augmented_lcs)):
        print_progress_bar(i + 1, len(data_augmented_lcs), start_time=start_time)

        lc = BasicLightcurve(data_augmented_lcs[i]['time'],
                         data_augmented_lcs[i]['flux'],
                         data_augmented_lcs[i]['flux_err'],
                         sector=99)

        lc.get_stats_using_defaults()

        table = lc.get_stats_table(names[i])[[
        'name',
        'rms',
        'std',
        'MAD',
        'sigmaG',
        'skewness',
        'von_neumann_ratio',
        'J_Stetson',
        'max_power',
        'freq_at_max_power',
        'n_peaks',
        'ratio_of_power_at_high_v_low_freq',
        'FAP',
        'max_autocorrelation',
        'time_of_max_autocorrelation'
    ]]

        # Periodogram and frequency grid
        frequency_list = 1 / np.arange(0.04, 11, 0.01)
        periodogram = lc.periodogram

        # Because the lightcurves were smoothed over timescales >10days, don't use those scales.
        periodogram = periodogram[(frequency_list < 10)]
        frequency_list = frequency_list[(frequency_list < 10)]
    
        # Sum power in specific period bands
        sum_LSP_power_10_7_days = np.sum(periodogram[(frequency_list < 10) & (frequency_list > 7)])
        sum_LSP_power_7_4_days  = np.sum(periodogram[(frequency_list < 7) & (frequency_list > 4)])
        sum_LSP_power_4_1_days  = np.sum(periodogram[(frequency_list < 4) & (frequency_list > 1)])
        sum_LSP_power_1_p5_days = np.sum(periodogram[(frequency_list < 1) & (frequency_list > 0.5)])

        # Shannon entropy of the flux
        entropy_val = entr(data_augmented_lcs[i]['flux'].value).sum()

        # Add new features
        table.add_column(Column(sum_LSP_power_10_7_days), name='SumLSP_10_7_Day_Power')
        table.add_column(Column(sum_LSP_power_7_4_days), name='SumLSP_7_4_Day_Power')
        table.add_column(Column(sum_LSP_power_4_1_days), name='SumLSP_4_1_Day_Power')
        table.add_column(Column(sum_LSP_power_1_p5_days), name='SumLSP_1_p5_Day_Power')
        table.add_column(Column(entropy_val), name='Entropy')

        # NEW: add full periodogram as a variable-length array column
        table.add_column(Column([periodogram], name='FullPeriodogram'))

        l_of_stat_tables.append(table)
    stat_table=vstack(l_of_stat_tables)
    stat_table['Sectors'] = sectors

    outpath = '/uufs/astro.utah.edu/common/home/u1363702/notebooks/tess_clusters/TESS_Cluster_Age_ML/data/resampled_summary_data/'
    fname = '{}_summary.fits'.format(names[0].replace(" ", "_").lower())

    stat_table.write(outpath + fname, overwrite=True)
    print('\n Done {}!'.format(names[0]))
    # return stat_table

In [None]:
count = 0
for i in cnames:
    l_of_cs, l_of__all_lcs, data_augmented_lcs, names, sectors = get_cluster_data(i)
    make_summary_table(data_augmented_lcs, names, sectors)
    count +=1
    print('{}/{}'.format(count,len(cnames)))

Progress: |██████████████████████████████| 100.0% | ETA: 00:00:00
 Done ASCC 116!
1/348
Progress: |██████████████████████████████| 100.0% | ETA: 00:00:00
 Done ASCC 57!
2/348
Progress: |██████████████████████████████| 100.0% | ETA: 00:00:00
 Done ASCC 81!
3/348
Progress: |██████████████████████████████| 100.0% | ETA: 00:00:00
 Done ASCC 8!
4/348
Progress: |██████████████████████████████| 100.0% | ETA: 00:00:00
 Done ASCC 9!
5/348
Progress: |██████████████████████████████| 100.0% | ETA: 00:00:00
 Done BASEL 12!
6/348
Progress: |█████████████-----------------| 44.4% | ETA: 00:00:01