In [1]:
# Standard libraries
import sys
import math
import random
import time
import os

# Third-party imports
import numpy as np
import pandas as pd
import pickle as pkl

import tensorflow as tf

import multiprocess
import psutil

import importlib
from yapf.yapflib.yapf_api import FormatCode

# Project imports
import ephesus
import utils
from const import *

In [2]:
# Verbosity
# 0 = off
# 1 is ending
# 2 is phases
# 3 is percent injection
verbo = 1

In [None]:
# Number of processes
proces_number = 20

max_time_gap = 10  # mins
caden = 2  # mins
spln_type = 'cubic'

remov_TOI = False
TIC8 = pd.read_csv(f'{tess_metad_path}nomi_miss_2min_info.csv')

stand_lengt = 1901
min_lengt = 600

In [3]:
for curre_secto in range(24,27):
    
    # Import light curves
    files = utils.retur_secto_files([curre_secto, curre_secto + 1])
    
    with multiprocess.Pool(proces_number) as pool:
        raw_fits_data = pool.map_async(utils.retur_fits_data, files).get()    
    # Delete files for memory space
    del files
    
    if verbo >= 2:
        utils.send_task_comple_email('Fits Data')
        
    # Interpolate and Format Light Curves
    for i in range(len(raw_fits_data)):
        # Insert 'inter_spot' for later interpolation
        curre_curve = utils.inser_inter_spot(raw_fits_data[i], max_time_gap, caden)
        # Interpolate each cut
        raw_fits_data[i] = utils.inter_curve(curre_curve, spln_type)

    curve = tf.keras.preprocessing.sequence.pad_sequences(raw_fits_data,
                                                           padding='pre',
                                                           dtype=object)

    # Delete raw_fits_data and curre_curve to free up memory space
    del raw_fits_data, curre_curve

    for i in range(len(curve)):
        curve[i, -1, 1]['initi_paddi'] = utils.find_start(curve[i, :-1])
        
    if verbo >= 2:
        utils.send_task_comple_email('Inter and Prep')
        
    # Mark TOIs, EBs, and Stellar Parameters
    # Find TIC ID of all TOIs
    toi_full = pd.read_csv(f'{tess_metad_path}toi.csv')
    toi_id = toi_full['TIC ID'].to_numpy()
    # Mark TOIs in light curve dataset
    utils.mark_TOI(curve, toi_id)
    
    # Remove the TOIs
    if remov_TOI:
        curve = utils.remov_TOI(curve)
        
    for i in range(len(curve)):
        TIC_ID = curve[i, -1, 1]['tic_id']
        # Find current TIC ID in TIC8
        star_infor = TIC8[TIC8.tici == TIC_ID]
        # Make sure TIC ID is in TIC8
        if len(star_infor):
            # Gather stellar radius, mass, temperature, and magnitude
            if str(star_infor.radistar.iloc[0]) != 'nan':
                curve[i, -1, 1]['stell_radiu'] = star_infor.radistar.iloc[0]
            if str(star_infor.massstar.iloc[0]) != 'nan':
                curve[i, -1, 1]['stell_mass'] = star_infor.massstar.iloc[0]
            if str(star_infor.tmptstar.iloc[0]) != 'nan':
                curve[i, -1, 1]['stell_effec_tempe'] = \
star_infor.tmptstar.iloc[0]
            if str(star_infor.tmag.iloc[0]) != 'nan':
                curve[i, -1, 1]['stell_magni'] = star_infor.tmag.iloc[0]

    if verbo >= 2:
        utils.send_task_comple_email('TOI')
        
    list_cuts = []
    for i in range(len(curve)):
        pre_cut = np.copy(curve[i])
        pre_cut[-1, 1] = pre_cut[-1, 1].copy()
        # Find used cut times and add them to all the cuts since they are all the
        # same object so they are shared 
        cut_times = []
        pre_cut[-1, 1]['cut_times'] = cut_times
        pre_cut = pre_cut.tolist()
        pre_cut = utils.cut_curve(pre_cut, max_time_gap, min_lengt, stand_lengt)
        # Cut number
        cut_numbe = 0
        for cut in pre_cut:
            # Remove all cuts of only [0,0]
            if cut[-2, 0]: 
                cut[-1, 1]['plane_moon_cut_injec'] = cut[-1, 1]['unmod_plane_moon_cut_injec'] = False
                cut[-1, 1]['plane_cut_injec'] = False
                if cut[-1, 1]['type_orbit_archi'] == 'planmoon':
                    cut[-1, 1]['plane_moon_cut_injec'] = cut[-1, 1]['unmod_plane_moon_cut_injec'] = \
utils.retur_curve_injec_statu(cut)
                elif cut[-1, 1]['type_orbit_archi'] == 'plan':
                    cut[-1, 1]['plane_cut_injec'] = utils.retur_curve_injec_statu(cut)
                cut[-1, 1]['cut_trans'] = cut[-1, 1]['plane_moon_cut_injec'] or cut[-1, 1]['plane_cut_injec']
                cut[-1, 1]['cut_numbe'] = cut_numbe
                cut[-1, 1]['inter_type'] = spln_type
                cut[-1, 1]['cut_start_index'] = utils.binar_searc(curve[i, :-1, 0], cut[utils.find_start(cut), 0])
                # Assign cut to list containing the rest of the cuts
                list_cuts.append(cut.tolist())
                # Increase cut number
                cut_numbe += 1
                cut_times.append([cut[utils.find_start(cut), 0], cut[-2, 0]])
        # Assign cut times to the curve
        curve[i, -1, 1]['cut_times'] = cut_times

    padde_cuts = tf.keras.preprocessing.sequence.pad_sequences(list_cuts,
                                                               maxlen=stand_lengt,
                                                               dtype=object)
    
    padde_cuts_path = f'{xom_data_path}forma_raw_curve/forma_curve-{curre_secto}.pkl'

    with open(padde_cuts_path, 'wb') as f:
        pkl.dump(padde_cuts, f)
        
    # Clear for memory
    del curve, padde_cuts
    
    if verbo >= 1:
        utils.send_task_comple_email(f'Raw Formatting and Cutting -- Sector {curre_secto}')     

### Formatting