In [1]:
# Standard libraries
import sys
import math
import random
import time
import os

# Third-party imports
import numpy as np
import pandas as pd
import pickle as pkl

import matplotlib.pyplot as plt

import tensorflow as tf

import multiprocess
import psutil

import importlib
from yapf.yapflib.yapf_api import FormatCode       

# Project imports
import ephesus
import utils
from const import *

In [2]:
# Verbosity
# 0 = off
# 1 is ending
# 2 is phases
# 3 is percent injection
verbo = 1

### Import Light Curves

In [3]:
# Around 500,000 total light curves
numbe_of_curve = 100000

# Access the file names
files = utils.retur_secto_files()
# Choose a random sample
sampl = utils.retur_rando_sampl(numbe_of_curve, len(files))
# Create a list containing the file names
files = [files[index] for index in sampl]

In [4]:
# Number of processes
proces_number = 30

In [5]:
with multiprocess.Pool(proces_number) as pool:
    raw_fits_data = pool.map(utils.retur_fits_data, files)
    
# Delete files for memory space
del files

In [6]:
if verbo >= 2:
    utils.send_task_comple_email('Fits Data')

### Interpolate and Format Light Curves

In [7]:
max_time_gap = 10  # mins
caden = 2  # mins
spln_type = 'cubic'

for i in range(len(raw_fits_data)):
    # Insert 'inter_spot' for later interpolation
    curre_curve = utils.inser_inter_spot(raw_fits_data[i], max_time_gap, caden)
    # Interpolate each cut
    raw_fits_data[i] = utils.inter_curve(curre_curve, spln_type)

curve = tf.keras.preprocessing.sequence.pad_sequences(raw_fits_data,
                                                       padding='pre',
                                                       dtype=object)

# Delete raw_fits_data and curre_curve to free up memory space
del raw_fits_data, curre_curve

for i in range(len(curve)):
    curve[i, -1, 1]['initi_paddi'] = utils.find_start(curve[i, :-1])
    curve[i, -1, 1]['raw_curve_lengt'] = len(curve[i, utils.find_start(curve[i, :-1]): -1, 1])

In [8]:
if verbo >= 2:
    utils.send_task_comple_email('Inter and Prep')

### Mark TOIs, EBs, and Stellar Parameters

In [9]:
remov_TOI = True

In [10]:
# Find TIC ID of all TOIs
toi_full = pd.read_csv(f'{tess_metad_path}toi.csv')
toi_id = toi_full['TIC ID'].to_numpy()
# Mark TOIs in light curve dataset
utils.mark_TOI(curve, toi_id)

In [11]:
# Remove the TOIs
if remov_TOI:
    curve = utils.remov_TOI(curve)

In [12]:
TIC8 = pd.read_csv(f'{tess_metad_path}nomi_miss_2min_info.csv')

for i in range(len(curve)):
    TIC_ID = curve[i, -1, 1]['tic_id']
    # Find current TIC ID in TIC8
    star_infor = TIC8[TIC8.tici == TIC_ID]
    # Make sure TIC ID is in TIC8
    if len(star_infor):
        # Gather stellar radius, mass, temperature, and magnitude
        if str(star_infor.radistar.iloc[0]) != 'nan':
            curve[i, -1, 1]['stell_radiu'] = star_infor.radistar.iloc[0]
        if str(star_infor.massstar.iloc[0]) != 'nan':
            curve[i, -1, 1]['stell_mass'] = star_infor.massstar.iloc[0]
        if str(star_infor.tmptstar.iloc[0]) != 'nan':
            curve[i, -1, 1]['stell_effec_tempe'] = \
star_infor.tmptstar.iloc[0]
        if str(star_infor.tmag.iloc[0]) != 'nan':
            curve[i, -1, 1]['stell_magni'] = star_infor.tmag.iloc[0]
            
# Delete TIC8 for memory space
del TIC8

In [13]:
if verbo >= 2:
    utils.send_task_comple_email('TOI')

### Calculate RMS

In [14]:
for i in range(len(curve)):
    curve[i, -1, 1]['rms'] = utils.calcu_rms(curve[i])

### Injecting

In [15]:
# The curve is divided into 100 sections
# plane_perce is the percent of the 
# planets that are injected
# plane_moon_perce is the
# percent of plane_moons that are injected

plane_perce = 20
plane_moon_perce = 70

In [16]:
# Divide Curve
divid = len(curve) // 100

if plane_perce:
    # Index of planet injections
    plane_index = range(0, plane_perce * divid)
    # Find the number the last injection
    last_injec_numbe = plane_perce * divid

if plane_moon_perce:
    # Index of planet and moon injections
    plane_moon_index = range(plane_perce * divid, \
plane_perce * divid + plane_moon_perce * divid)
    # Find the number the last injection
    last_injec_numbe = plane_moon_perce * divid

In [17]:
# Save injections every curve_save_numbe curves
curve_save_numbe = 10000
curve_save_index = [last_injec_numbe]

curre_save_index = 0

# Just run the whole injection if the number of injected curves
# is less than 4 times the minimum save amout
if last_injec_numbe < curve_save_numbe * 4:
    curve_save_index.insert(0, 0)
else:
    while curre_save_index < last_injec_numbe:
        curve_save_index.insert(-1, curre_save_index)
        curre_save_index += curve_save_numbe

In [18]:
%%capture

# Set up save flag
save = False

# Planet injection setup
plane_max_numbe = 1
moon_max_numbe = 0
type_orbit_archi = 'plan'

injec_argum = []

for i in plane_index:
    injec_argum.append(
        (curve[i], plane_max_numbe, moon_max_numbe, type_orbit_archi))

# Planet and moon injection setup
plane_max_numbe = 1
moon_max_numbe = 1
type_orbit_archi = 'planmoon'
separ_plane_moon = True
anima_path = None

# Setup initial save file
if len(curve_save_index) > 2:
    with open(f'{main_path}tempo.txt', 'w') as f:
        f.write('')
    filen = f'{main_path}tempo.txt'
    save = True

if plane_moon_perce:
    # Format the arguments for multiprocessing
    for i in plane_moon_index:
        injec_argum.append((curve[i], plane_max_numbe, moon_max_numbe,
                            type_orbit_archi, separ_plane_moon, anima_path))


for i in range(len(curve_save_index) - 1):
    with multiprocess.Pool(proces_number) as pool:
        curve[
            curve_save_index[i]:curve_save_index[i + 1]] = pool.starmap(
                utils.injec_signa,
                injec_argum[curve_save_index[i]:curve_save_index[i + 1]])
        if save:
            if verbo >= 3:
                # Send an update
                utils.send_task_comple_email(
                    f'{((curve_save_index[i + 1] - 1) / (curve_save_index[-1] - 1)):.1%} \
of the injection is complete')
            # Remove previous version
            os.remove(filen)
            # Save new version
            filen = f'{xom_data_path}injec_curve/curve-parti-{int(time.time())}.pkl'
            with open(filen, 'wb') as f:
                pkl.dump(curve, f)

if save:
    # Rename the final injected curve
    os.rename(
        filen,
        f'{xom_data_path}injec_curve/curve-full-{int(time.time())}.pkl')

curve = np.array(curve)

In [19]:
# Add a random epsilon value to any flux values of 0
# so they are not viewed as padding, and therefore
# ignored by the masking
EPSIL = 1e-10
for i in range(len(curve)):
    initi_paddi = curve[i, -1, 1]['initi_paddi']
    for ii in range(initi_paddi, initi_paddi + len(curve[i, initi_paddi:-1])):
        if curve[i, ii, 1] == 0:
            curve[i, ii, 1] += EPSIL * random.random()

### Detrending

### Cutting, Interpolating, and Padding

In [20]:
stand_lengt = 1901
min_lengt = 600

list_cuts = []
for i in range(len(curve)):
    pre_cut = np.copy(curve[i])
    pre_cut[-1, 1] = pre_cut[-1, 1].copy()
    # Find used cut times and add them to all the cuts since they are all the
    # same object so they are shared 
    cut_times = []
    pre_cut[-1, 1]['cut_times'] = cut_times
    pre_cut = pre_cut.tolist()
    pre_cut = utils.cut_curve(pre_cut, max_time_gap, min_lengt, stand_lengt)
    # Cut number
    cut_numbe = 0
    for cut in pre_cut:
        # Remove all cuts of only [0,0]
        if cut[-2, 0]: 
            cut[-1, 1]['plane_moon_cut_injec'] = cut[-1, 1]['unmod_plane_moon_cut_injec'] = False
            cut[-1, 1]['plane_cut_injec'] = False
            if cut[-1, 1]['type_orbit_archi'] == 'planmoon':
                cut[-1, 1]['plane_moon_cut_injec'] = cut[-1, 1]['unmod_plane_moon_cut_injec'] = \
utils.retur_curve_injec_statu(cut)
            elif cut[-1, 1]['type_orbit_archi'] == 'plan':
                cut[-1, 1]['plane_cut_injec'] = utils.retur_curve_injec_statu(cut)
            cut[-1, 1]['cut_trans'] = cut[-1, 1]['plane_moon_cut_injec'] or cut[-1, 1]['plane_cut_injec']
            cut[-1, 1]['cut_numbe'] = cut_numbe
            cut[-1, 1]['inter_type'] = spln_type
            cut[-1, 1]['cut_start_index'] = utils.binar_searc(curve[i, :-1, 0], cut[utils.find_start(cut), 0])
            # Assign cut to list containing the rest of the cuts
            list_cuts.append(cut.tolist())
            # Increase cut number
            cut_numbe += 1
            cut_times.append([cut[utils.find_start(cut), 0], cut[-2, 0]])
    # Assign cut times to the curve
    curve[i, -1, 1]['cut_times'] = cut_times
    
padde_cuts = tf.keras.preprocessing.sequence.pad_sequences(list_cuts,
                                                           maxlen=stand_lengt,
                                                           dtype=object)

### Calculate SNR

In [22]:
# Calculate the Signal to Noise ratio
for i in range(len(padde_cuts)):
    if padde_cuts[i, -1, 1]['plane_moon_cut_injec']:
        # Find what proportion the cut is of the initial curve
        propo = len(np.where(padde_cuts[i, :-1, 1] != 0)[0]) / \
padde_cuts[i, -1, 1]['raw_curve_lengt']
        noise = padde_cuts[i, -1, 1]['rms'] * propo * (padde_cuts[i, -1, 1]['plane_trans_durat']) ** (1/2)
        padde_cuts[i, -1, 1]['snr'] = padde_cuts[i, -1, 1]['max_ampli'] / noise

In [23]:
# Remove curves lower than specified snr
SNR_MINIM_BOUND = 35
# Find all the usable cuts
usabl_cuts = []
for i in range(len(padde_cuts)):
    # Ignore cuts with too low of an snr
    if padde_cuts[i, -1, 1]['snr'] == None or padde_cuts[i, -1, 1]['snr'] > SNR_MINIM_BOUND:
        usabl_cuts.append(i)
        
padde_cuts = padde_cuts[usabl_cuts]

### Save Data

In [None]:
numbe_raw_cuts = 0
numbe_plane_cuts = 0
numbe_plane_moon_cuts = 0
for i in range(len(padde_cuts)):
    if padde_cuts[i, -1, 1]['plane_moon_cut_injec']:
        numbe_plane_moon_cuts += 1
    elif padde_cuts[i, -1, 1]['plane_cut_injec']:
        numbe_plane_cuts += 1
    else:
        numbe_raw_cuts += 1

padde_cuts_path = f'{xom_data_path}padde_cuts/padde_cuts-{numbe_raw_cuts}-\
{numbe_plane_cuts}-{numbe_plane_moon_cuts}.pkl'

with open(padde_cuts_path, 'wb') as f:
    pkl.dump(padde_cuts, f)
print(f'Padded Cuts Path: {padde_cuts_path}')

In [18]:
if verbo >= 1:
    utils.send_task_comple_email(f'Injection and Cutting, {padde_cuts_path}')

### Formatting