In [1]:
import logging
import io
import os
import csv
import datetime
import stopit
import boto3
from glob import glob
from pathlib import Path

import lightkurve as lk
import numpy as np
import pandas as pd
import requests
import math
from astropy import units as u
import warnings
from astropy.table import QTable

In [2]:
logger = logging.getLogger(__name__)

logger.setLevel(logging.INFO)
handler = logging.FileHandler('preprocess.log')
logger.addHandler(handler)

TESS_DATA_URL = 'https://exofop.ipac.caltech.edu/tess/download_toi.php?sort=toi&output=csv'
LOCAL_DATA_FILE_NAME = '/Users/julianornelas/spacelab/new_world_disco/code/tess_data.csv'
DEFAULT_TESS_ID = '2016376984' # a working 'v-shaped' lightcurve. Eventually we'll need to run this for all lightcurves from tess
BJD_TO_BCTJD_DIFF = 2457000
OUTPUT_FOLDER = 'tess_data/' # modified to save to different output folder
subFolders = ['tic_info/', 'locGlo_flux/', 'locGlo_cent/'] #sub folders within the main output folder
# A list of all the valid authors that are supported by
# lightkurves searches and downloads
valid_authors = ["Kepler","K2", "SPOC","TESS-SPOC","QLP","TASOC","PATHOS","CDIPS","K2SFF","EVEREST","TESScut","GSFC-ELEANOR-LITE"]

# these bin numbers for TESS from Yu et al. (2019) section 2.3: https://iopscience.iop.org/article/10.3847/1538-3881/ab21d6/pdf
global_bin_width_factor = 201
local_bin_width_factor = 61

In [3]:
def fetch_tess_data_df():
    """
    Method to load TESS data. 

    If data does not exist locally, it will be downloaded from
    TESS_DATA_URL and saved locally.  As more TESS data comes in,
    the existing file will expand to include the newer data.
    """

    if os.path.isfile(LOCAL_DATA_FILE_NAME):
        return pd.read_csv(LOCAL_DATA_FILE_NAME)


In [4]:
def export_lightcurve(lc, filename):
    """
    Method to save lightcurve data as CSV and a NumPy array file (.npy) representing flux.

    Inputs: lc = lightcurve to be saved.
            folder = folder in which to save file.
            filename = name of the file.
    """

    if not os.path.isdir(f"/home/ubuntu/spaceLab/{OUTPUT_FOLDER}"):
        os.mkdir(os.path.join(os.getcwd(), f"/home/ubuntu/spaceLab/{OUTPUT_FOLDER}"))

    # Creating the subfolder, if needed
    for subfolder in subFolders:
        if not os.path.isdir(f"/home/ubuntu/spaceLab/{OUTPUT_FOLDER+subfolder}"):
            os.makedirs(os.path.join(f"/home/ubuntu/spaceLab/{OUTPUT_FOLDER}", subfolder), exist_ok=True)

#   lc.to_csv(f"./data/{filename}.csv", overwrite=True)
    np.save(f"/home/ubuntu/spaceLab/{OUTPUT_FOLDER+subFolders[1]+str(filename)}_flux.npy", np.array(lc['flux']))

In [5]:
def normalize_centroid(centroid_data):
    # normalize by subtracting median and dividing by standard deviation
    med = np.median(centroid_data)
    std = np.std(centroid_data)
    centroid_data -= med
    if std == 0:
        logger.info("Error; normalize_centroid(): std == 0")
        return
    centroid_data /= std

In [6]:
def get_mag(x, y):
    # get magnitude as: sqrt(x^2 + y^2)
    return math.sqrt(x*x + y*y)

In [7]:
def preprocess_centroid(lc_local, lc_global):
    """
    Method for preprocessing TESS centroid data

    Input: local and global lightcurve objects (already pre-processed)
    Output: local and global centroid position numpy arrays
    """
    sap_global_condition = 'sap_x' in lc_global.columns and 'sap_y' in lc_global.columns
    sap_local_condition = 'sap_x' in lc_local.columns and 'sap_y' in lc_local.columns
    if sap_global_condition and sap_local_condition:
        # remove the pix dimension
        global_x = np.array([float(x*u.pix/u.pix) for x in lc_global['sap_x']])
        global_y = np.array([float(y*u.pix/u.pix) for y in lc_global['sap_y']])
        local_x = np.array([float(x*u.pix/u.pix) for x in lc_local['sap_x']])
        local_y = np.array([float(y*u.pix/u.pix) for y in lc_local['sap_y']])
    else:
        # TO DO checking for centroid_row, centroid_col and performing preprocessing the data 
        '''
        centroid_global_cond = 'centroid_row' in lc_global.columns and 'centroid_col' in lc_global.columns
        centroid_local_cond = 'centroid_row' in lc_global.columns and 'centroid_col' in lc_global.columns
        if centroid_global_cond and centroid_local_cond:
            # remove the pix dimension...keeeping same name convention
            cent_row_global = np.array([float(row/u.pix) for row in lc_global['centroid_row']])
            cent_col_global = np.array([float(col/u.pix) for col in lc_global['centroid_col']])
            cent_row_local = np.array([float(row/u.pix) for row in lc_local['centroid_row']])
            cent_col_local = np.array([float(col/u.pix) for col in lc_local['centroid_col']])
        else:
        '''
        logger.info("Error: preprocess_centroid(): No handling for centroid data not stored in sap_x, sap_y or centroid_row, centroid_col")
        return

    # compute r = sqrt(x^2 + y^2) for each centroid location
    local_cen = np.array([get_mag(x,y) for x, y in zip(local_x, local_y)])
    global_cen = np.array([get_mag(x,y) for x, y in zip(global_x, global_y)])

    # normalize by subtracting mean and dividing by standard deviation
    normalize_centroid(local_cen)
    normalize_centroid(global_cen)

    return local_cen, global_cen


In [8]:
tess_data = fetch_tess_data_df()
print(tess_data.shape)
#20 seconds to fetch the data from the site vs 63 milliseconds to read in the file if it already exists

(6586, 62)


In [9]:
select_index = 0
for index, row in tess_data.iterrows():
        if index < select_index:
                continue
        tess_id = str(row['TIC ID'])
        if index == select_index:
                break
print(index, str(tess_id))

0 231663901


In [10]:

# Download and stitch all lightcurve quarters together.

id_string = f'TIC {tess_id}'

print("Loading lightcurves")

q = lk.search_lightcurve(id_string)
q


Loading lightcurves


#,mission,year,author,exptime,target_name,distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,s,Unnamed: 5_level_1,arcsec
0,TESS Sector 01,2018,SPOC,120,231663901,0.0
1,TESS Sector 01,2018,TESS-SPOC,1800,231663901,0.0
2,TESS Sector 01,2018,QLP,1800,231663901,0.0
3,TESS Sector 01,2018,TASOC,120,231663901,0.0
4,TESS Sector 01,2018,GSFC-ELEANOR-LITE,1800,231663901,0.0
5,TESS Sector 01,2018,TASOC,1800,231663901,0.0
6,TESS Sector 01,2018,TASOC,1800,231663901,0.0
7,TESS Sector 01,2018,TGLC,1800,231663901,0.0
8,TESS Sector 27,2020,SPOC,20,231663901,0.0
9,TESS Sector 27,2020,SPOC,120,231663901,0.0


In [11]:
# Stores all of the authors from different observations
# For a given 
authors_column = q.table['author']
authors_column

0
SPOC
TESS-SPOC
QLP
TASOC
GSFC-ELEANOR-LITE
TASOC
TASOC
TGLC
SPOC
SPOC


In [12]:
# Creates a boolean mask for rows where all authors are valid
valid_authors_mask = [all(author in valid_authors for author in authors.split(','))
                for authors in authors_column]
valid_authors_mask



[True, True, True, True, True, True, True, False, True, True, True, True]

In [13]:
# Select only the rows where all authors are valid
# Allows us to remove rows that have unsupported authors
search_result = q[valid_authors_mask]
search_result



#,mission,year,author,exptime,target_name,distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,s,Unnamed: 5_level_1,arcsec
0,TESS Sector 01,2018,SPOC,120,231663901,0.0
1,TESS Sector 01,2018,TESS-SPOC,1800,231663901,0.0
2,TESS Sector 01,2018,QLP,1800,231663901,0.0
3,TESS Sector 01,2018,TASOC,120,231663901,0.0
4,TESS Sector 01,2018,GSFC-ELEANOR-LITE,1800,231663901,0.0
5,TESS Sector 01,2018,TASOC,1800,231663901,0.0
6,TESS Sector 01,2018,TASOC,1800,231663901,0.0
7,TESS Sector 27,2020,SPOC,20,231663901,0.0
8,TESS Sector 27,2020,SPOC,120,231663901,0.0
9,TESS Sector 27,2020,TESS-SPOC,600,231663901,0.0


In [14]:
# Downloading the Lightkurve data
lcs = search_result.download_all()
lcs

LightCurveCollection of 11 objects:
    0: <TessLightCurve LABEL="TIC 231663901" SECTOR=1 AUTHOR=SPOC FLUX_ORIGIN=pdcsap_flux>
    1: <TessLightCurve LABEL="TIC 231663901" SECTOR=1 AUTHOR=SPOC FLUX_ORIGIN=pdcsap_flux>
    2: <TessLightCurve LABEL="TIC 231663901" SECTOR=1 AUTHOR=QLP FLUX_ORIGIN=sap_flux>
    3: <TessLightCurve LABEL="TIC 231663901" SECTOR=1 AUTHOR=TASOC FLUX_ORIGIN=flux_raw>
    4: <TessLightCurve LABEL="TIC 231663901" SECTOR=1 AUTHOR=GSFC-ELEANOR-LITE FLUX_ORIGIN=corr_flux>
    5: <TessLightCurve LABEL="TIC 231663901" SECTOR=1 AUTHOR=TASOC FLUX_ORIGIN=flux_raw>
    6: <TessLightCurve LABEL="TIC 231663901" SECTOR=1 AUTHOR=TASOC FLUX_ORIGIN=flux_raw>
    7: <TessLightCurve LABEL="TIC 231663901" SECTOR=27 AUTHOR=SPOC FLUX_ORIGIN=pdcsap_flux>
    8: <TessLightCurve LABEL="TIC 231663901" SECTOR=27 AUTHOR=SPOC FLUX_ORIGIN=pdcsap_flux>
    9: <TessLightCurve LABEL="TIC 231663901" SECTOR=27 AUTHOR=SPOC FLUX_ORIGIN=pdcsap_flux>
    10: <TessLightCurve LABEL="TIC 231663901" SECT

In [15]:

list_of_lcs_dfs = [] #Initialize list to hold df's
print("Converting lightcurves to dataframes")
for i in range(len(lcs)):
    lcs_df = lcs[i].to_pandas() #Convert lightcurve at index i to pandas df
    list_of_lcs_dfs.append(lcs_df) #Append df to list



Converting lightcurves to dataframes


In [16]:
new_df = ''
if len(lcs) == 1:
    new_df = list_of_lcs_dfs[0] #If the length is 1, then use only that df
elif len(lcs) == 2:
    new_df = pd.concat([list_of_lcs_dfs[0], list_of_lcs_dfs[1]], axis=0, join="outer") #If length is 2, concat df's together
elif len(lcs) > 2:
    new_df = pd.concat([list_of_lcs_dfs[0], list_of_lcs_dfs[1]], axis=0, join="outer") #Initialize
    for j in range(2, len(lcs)):
        new_df = pd.concat([new_df, list_of_lcs_dfs[j]], axis=0, join="outer") #Concat each subsequent df
    
new_df.sort_index(inplace=True) #Sort the time index
new_df = new_df[['sap_bkg_err', 'sap_bkg', 'sap_flux', 'sap_x', 'sap_y', 'centroid_row', 'centroid_col']]

#Note for potential optimizing:
#Filter new_df columns to include only the following:
#[time, flux, flux_err, cadenceno, quality, sap_bkg_err, sap_bkg, sap_flux, sap_x, sap_y, centroid_row, centroid_col]



In [17]:
q_table = QTable.from_pandas(new_df, index=True) #Convert the dataframe into a QTable
lc_temp = lk.LightCurve(data=q_table) #Convert the QTable into a LightCurve object

###### END JULIAN'S CODE ######
#################################



In [18]:
lc_raw = lcs.stitch()
#stitch eliminates sap and centroid columns
#print(lc_raw)
#Try converting lc_raw and lc_temp to dfs, concatenate, convert back to lc objects, and normalize
lc_raw_df = lc_raw.to_pandas()
lc_temp_df = lc_temp.to_pandas()
lc_temp_df.drop(columns=['flux', 'flux_err'], inplace=True)
joined_df = lc_raw_df.join(lc_temp_df, how='outer')
joined_df.sort_index(inplace=True)
q_table_2 = QTable.from_pandas(joined_df, index=True)
lc_concat = lk.LightCurve(data=q_table_2)



In [19]:

print("Extracting stellar parameters")

threshold_crossing_events = tess_data[tess_data['TIC ID'] == int(tess_id)]
threshold_crossing_events



Extracting stellar parameters


Unnamed: 0,TIC ID,TOI,Previous CTOI,Master,SG1A,SG1B,SG2,SG3,SG4,SG5,...,Stellar Radius (R_Sun) err,Stellar Metallicity,Stellar Metallicity err,Stellar Mass (M_Sun),Stellar Mass (M_Sun) err,Sectors,Date TOI Alerted (UTC),Date TOI Updated (UTC),Date Modified,Comments
0,231663901,101.01,,5,5,5,5,5,5,5,...,0.043847,,,1.05,0.129454,127,2018-09-05,2021-10-07,2022-12-14 12:09:24,WASP-46 b


In [20]:
tce_count = threshold_crossing_events.shape[0]
tce_count



1

In [21]:
for i in range(tce_count):
    
    period, duration = threshold_crossing_events['Period (days)'].iloc[i].item(),  threshold_crossing_events['Duration (hours)'].iloc[i].item()
    t0 = threshold_crossing_events['Epoch (BJD)'].iloc[i].item() - BJD_TO_BCTJD_DIFF

    # info contains: [0]tic, [1]tce, [2]period, [3]epoch, [4]duration, [5]label,
    # [6]Teff, [7]logg, [8]metallicity, [9]mass, [10]radius, [11]density
    info = np.full((12,), np.nan)

    info[0] = tess_id
    info[1] = i + 1
    info[2] = period
    info[3] = threshold_crossing_events['Epoch (BJD)'].item()
    info[4] = duration

    # if label is -1, these are unknowns for the experimental set
    if threshold_crossing_events['TFOPWG Disposition'].item() in ['KP', 'CP']:
        info[5] = 1
    elif threshold_crossing_events['TFOPWG Disposition'].item() in ['FA', 'FP']:
        info[5] = 0
    else:
        info[5] = -1

    info[6] = threshold_crossing_events['Stellar Eff Temp (K)'].item()
    info[7] = threshold_crossing_events['Stellar log(g) (cm/s^2)'].item()
    info[8] = threshold_crossing_events['Stellar Metallicity'].item()
    info[9] = threshold_crossing_events['Stellar Mass (M_Sun)'].item()
    info[10] = threshold_crossing_events['Stellar Radius (R_Sun)'].item()
    
    stellar_params_link = f'https://exofop.ipac.caltech.edu/tess/download_planet.php?id={tess_id}&output=csv'
    stellar_params_file = f'/Users/julianornelas/spacelab/new_world_disco/code/stellar_params_{tess_id}.csv'

    res = requests.get(stellar_params_link)
    stellar_params_raw = res.content
    with open(stellar_params_file, 'wb+') as f:
        f.write(stellar_params_raw)

    densities = pd.read_csv(stellar_params_file, sep='|')['Fitted Stellar Density (g/cm3)']

    if not np.all(densities.isna()):
        info[11] = densities.dropna().iloc[0].item()

    os.remove(stellar_params_file)
info

array([2.31663901e+08, 1.00000000e+00, 1.43036914e+00, 2.45903690e+06,
       1.64387283e+00, 1.00000000e+00, 5.60000000e+03, 4.48851000e+00,
                  nan, 1.05000000e+00, 8.90774012e-01, 1.40000000e-01])

In [22]:

print("Processing outliers")

lc_clean = lc_concat.remove_outliers(sigma=3)
lc_clean


Processing outliers


time,flux,flux_err,cadenceno,quality,sap_bkg_err,sap_bkg,sap_flux,sap_x,sap_y,centroid_row,centroid_col
Time,float64,float64,int64,int32,float32,float32,float32,float32,float32,float64,float64
1325.2989581680333,0.9920082358629023,0.0036284190449914393,70444,1,--,--,--,--,--,1517.9420962774022,1849.9081364080841
1325.3003470508313,0.9970574640240786,0.003633144713511413,70445,0,--,--,--,--,--,1517.872242176807,1849.8578748525895
1325.3003471613704,1.0001670122146606,0.004209726583212614,70445,0,1.4734054,363.20078,1217.2936,--,--,1518.5597399748099,1849.624121381664
1325.3017359331632,1.003645817947945,0.003635765020326321,70446,0,--,--,--,--,--,1517.8183558820342,1849.8394513483438
1325.301736044114,1.0020424127578735,0.00422222726047039,70446,0,1.4699463,361.8014,1232.5476,--,--,1518.545833920395,1849.6127946413612
1325.3031248159598,1.0016840922558774,0.00363398488711168,70447,0,--,--,--,--,--,1517.845480042903,1849.8367590213109
1325.303124926392,1.0033267736434937,0.004230615217238665,70447,0,1.4749018,362.3199,1237.3829,--,--,1518.5462079937986,1849.6087200503089
1325.304513698292,0.9985911233200208,0.0036311333888319874,70448,0,--,--,--,--,--,1517.827460795185,1849.8285685465912
1325.3045138091354,0.9973147511482239,0.004228027071803808,70448,0,1.4729211,364.01382,1231.4357,--,--,1518.5426679829093,1849.6088174025367
...,...,...,...,...,...,...,...,...,...,...,...


In [23]:
# Do the hacky masking from here: https://docs.lightkurve.org/tutorials/3-science-examples/exoplanets-machine-learning-preprocessing.html
temp_fold = lc_clean.fold(period, epoch_time=t0)
temp_fold


time,flux,flux_err,time_original,cadenceno,quality,sap_bkg_err,sap_bkg,sap_flux,sap_x,sap_y,centroid_row,centroid_col
TimeDelta,float64,float64,Time,int64,int32,float32,float32,float32,float32,float32,float64,float64
-0.7151776396311,0.9945590496063232,0.0028195392806082964,2037.619295500666,116662,0,266.77,-4160.75,0.99456024,1238.5465,1413.1484,--,--
-0.7151642242994893,0.99570232629776,0.009874920360744,2043.3407854773568,3524609,0,5.3628235,827.3658,1433.7543,--,--,1413.7575666218015,1238.8245342843609
-0.7151618096895263,1.0043126344680786,0.009651809930801392,2053.353371874345,3567863,0,5.126369,734.7216,1433.0105,--,--,1413.7244401928374,1238.8170872813007
-0.7151412663366987,0.9889028668403625,0.009856383316218853,2041.9104392949798,3518430,0,5.3577113,820.6362,1430.3433,--,--,1413.7751246471232,1238.8193023704573
-0.7151404105237874,1.0091885328292847,0.004300144966691732,1331.016977401927,74561,0,1.5338278,415.44968,1236.9125,--,--,1518.536952573893,1849.6176776830584
-0.715136393722452,1.0006095110424205,0.003742436269407665,1331.0169814187284,74561,0,--,--,--,--,--,1517.8133859761408,1849.849943474713
-0.7151265435894003,1.0018806457519531,0.004994705785065889,2047.6319305790862,590524,0,2.8947306,1327.0607,1456.2799,--,--,1413.7682305194744,1238.8243281341006
-0.7151218376571828,0.9840744733810425,0.010701490566134453,2059.0748884077366,3592580,0,6.1813736,1199.2513,1414.0239,--,--,1413.7448262415767,1238.8358386522737
-0.7151202767237148,0.9960549473762512,0.00966646522283554,2040.480091144253,3512251,0,5.1379404,738.1076,1435.0116,--,--,1413.774879814998,1238.8274610051008
...,...,...,...,...,...,...,...,...,...,...,...,...


In [24]:
fractional_duration = (duration / 24.0) / period
fractional_duration


0.04788603122771668

In [25]:
phase_mask = np.abs(temp_fold.phase.value) < (fractional_duration * 1.5)
phase_mask


array([False, False, False, ..., False, False, False])

In [26]:
transit_mask = np.in1d(lc_clean.time.value, temp_fold.time_original.value[phase_mask])
transit_mask

array([False, False, False, ..., False, False, False])

In [27]:

print("Flattening lightcurve")

lc_flat = lc_clean.flatten(mask=transit_mask)
lc_flat


Flattening lightcurve


time,flux,flux_err,cadenceno,quality,sap_bkg_err,sap_bkg,sap_flux,sap_x,sap_y,centroid_row,centroid_col
Time,float64,float64,int64,int32,float32,float32,float32,float32,float32,float64,float64
1325.2989581680333,1.0,0.003657650122062993,70444,1,--,--,--,--,--,1517.9420962774022,1849.9081364080841
1325.3003470508313,0.9984430652500123,0.0036381936599870946,70445,0,--,--,--,--,--,1517.872242176807,1849.8578748525895
1325.3003471613704,1.0015569347499877,0.0042155768000005246,70445,0,1.4734054,363.20078,1217.2936,--,--,1518.5597399748099,1849.624121381664
1325.3017359331632,1.000799428926951,0.0036254538114798276,70446,0,--,--,--,--,--,1517.8183558820342,1849.8394513483438
1325.301736044114,0.9992005710730489,0.004210252815797352,70446,0,1.4699463,361.8014,1232.5476,--,--,1518.545833920395,1849.6127946413612
1325.3031248159598,0.9991807119773989,0.003624902935856772,70447,0,--,--,--,--,--,1517.845480042903,1849.8367590213109
1325.303124926392,1.000819288022601,0.004220042184500554,70447,0,1.4749018,362.3199,1237.3829,--,--,1518.5462079937986,1849.6087200503089
1325.304513698292,1.0006394951726554,0.0036385817941433788,70448,0,--,--,--,--,--,1517.827460795185,1849.8285685465912
1325.3045138091354,0.9993605048273447,0.004236699862342208,70448,0,1.4729211,364.01382,1231.4357,--,--,1518.5426679829093,1849.6088174025367
...,...,...,...,...,...,...,...,...,...,...,...


In [28]:
lc_fold = lc_flat.fold(period, epoch_time=t0)
lc_fold

time,flux,flux_err,time_original,cadenceno,quality,sap_bkg_err,sap_bkg,sap_flux,sap_x,sap_y,centroid_row,centroid_col
TimeDelta,float64,float64,Time,int64,int32,float32,float32,float32,float32,float32,float64,float64
-0.7151776396311,0.9951949382972965,0.0028213420022697236,2037.619295500666,116662,0,266.77,-4160.75,0.99456024,1238.5465,1413.1484,--,--
-0.7151642242994893,0.9961256423910052,0.009879118616184217,2043.3407854773568,3524609,0,5.3628235,827.3658,1433.7543,--,--,1413.7575666218015,1238.8245342843609
-0.7151618096895263,1.0047974379569462,0.009656469068770733,2053.353371874345,3567863,0,5.126369,734.7216,1433.0105,--,--,1413.7244401928374,1238.8170872813007
-0.7151412663366987,0.9904579506074024,0.009871882817950248,2041.9104392949798,3518430,0,5.3577113,820.6362,1430.3433,--,--,1413.7751246471232,1238.8193023704573
-0.7151404105237874,1.0042685989336209,0.004279181164300337,1331.016977401927,74561,0,1.5338278,415.44968,1236.9125,--,--,1518.536952573893,1849.6176776830584
-0.715136393722452,0.9957314010663789,0.0037241913741722817,1331.0169814187284,74561,0,--,--,--,--,--,1517.8133859761408,1849.849943474713
-0.7151265435894003,1.00316655262277,0.005001116455352773,2047.6319305790862,590524,0,2.8947306,1327.0607,1456.2799,--,--,1413.7682305194744,1238.8243281341006
-0.7151218376571828,0.9853170811576479,0.01071500352247917,2059.0748884077366,3592580,0,6.1813736,1199.2513,1414.0239,--,--,1413.7448262415767,1238.8358386522737
-0.7151202767237148,0.9942079925239955,0.009648540985930232,2040.480091144253,3512251,0,5.1379404,738.1076,1435.0116,--,--,1413.774879814998,1238.8274610051008
...,...,...,...,...,...,...,...,...,...,...,...,...


In [29]:

print("Creating global representation")
print(period)
print(global_bin_width_factor)
global_bin_width_factor = 201.00
  
lc_global = lc_fold.bin(time_bin_size=period/global_bin_width_factor) #.normalize() #- 1
lc_global


Creating global representation
1.43036914033977
201


time,flux,flux_err,time_bin_start,time_bin_size,cadenceno,quality,sap_bkg_err,sap_bkg,sap_flux,sap_x,sap_y,centroid_row,centroid_col
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,d,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
TimeDelta,float64,float64,TimeDelta,float64,int64,int32,float32,float32,float32,float32,float32,float64,float64
-0.7116195074412001,1.0001734936882478,0.0002741582491221615,-0.7151776396311,0.007116264379799851,2037834,11,16.403736,733.5432,1356.2579,1432.8849,1446.4686,1433.4469567813699,1404.7861340654729
-0.7045032430614002,1.0000819399870358,0.0002742846817696551,-0.7080613752513001,0.007116264379799851,2050698,11,15.5146675,742.8562,1358.7572,1413.0526,1443.0642,1433.204007858749,1402.726119656331
-0.6973869786816004,1.0000385220594237,0.0002706237339286997,-0.7009451108715004,0.007116264379799851,2022060,10,15.418104,744.2184,1358.925,1413.0521,1443.0651,1434.7536399482854,1403.1923438657925
-0.6902707143018005,1.000240023692335,0.0002692355054932945,-0.6938288464917004,0.007116264379799851,2012545,15,16.254627,752.12085,1357.541,1432.8848,1446.4679,1435.5465970561177,1407.6332603320732
-0.6831544499220006,0.9998189360953226,0.00026836316922304376,-0.6867125821119006,0.007116264379799851,2012064,21,14.856899,814.2632,1356.3212,1460.6455,1451.2404,1430.8816022822775,1404.8611170563809
-0.6760381855422009,1.0000221036269152,0.0002800914222989603,-0.6795963177321007,0.007116264379799851,2086588,19,15.069643,747.04736,1362.8954,1399.276,1440.6963,1435.5552294897527,1400.8714164399444
-0.6689219211624009,1.0001994948205934,0.00027819609941804924,-0.6724800533523009,0.007116264379799851,2079149,14,15.8393955,763.88654,1358.5402,1413.0514,1443.065,1434.7024391629686,1404.113743549568
-0.6618056567826012,0.9998592315740938,0.0002699170812642491,-0.665363788972501,0.007116264379799851,2018138,16,16.218403,679.0539,1357.8325,1432.8827,1446.4657,1433.2910596507168,1402.8182494636314
-0.6546893924028012,1.0001908852850974,0.00027295438532818955,-0.6582475245927012,0.007116264379799851,2039174,14,16.183176,748.33636,1361.8842,1421.7781,1444.5602,1434.88025595431,1404.2138004972528
...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [30]:
if not (len(lc_global) == global_bin_width_factor):
    logger.info(f'{tess_id} lc_global incorrect dimension: {len(lc_global)}')
    #return
lc_global = (lc_global / np.abs(np.nanmin(lc_global.flux)) ) * 2.0 + 1
lc_global

time,flux,flux_err,time_bin_start,time_bin_size,cadenceno,quality,sap_bkg_err,sap_bkg,sap_flux,sap_x,sap_y,centroid_row,centroid_col
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,d,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
TimeDelta,float64,float64,TimeDelta,float64,int64,int32,float32,float32,float32,float32,float32,float64,float64
-0.7116195074412001,3.0314425356636003,0.0005568401205235428,-0.7151776396311,0.007116264379799851,2037834,11,16.403736,733.5432,1356.2579,1432.8849,1446.4686,1433.4469567813699,1404.7861340654729
-0.7045032430614002,3.031256581842475,0.0005570969166290544,-0.7080613752513001,0.007116264379799851,2050698,11,15.5146675,742.8562,1358.7572,1413.0526,1443.0642,1433.204007858749,1402.726119656331
-0.6973869786816004,3.03116839611718,0.0005496612015137319,-0.7009451108715004,0.007116264379799851,2022060,10,15.418104,744.2184,1358.925,1413.0521,1443.0651,1434.7536399482854,1403.1923438657925
-0.6902707143018005,3.03157766409987,0.0005468415844066035,-0.6938288464917004,0.007116264379799851,2012545,15,16.254627,752.12085,1357.541,1432.8848,1446.4679,1435.5465970561177,1407.6332603320732
-0.6831544499220006,3.030722397227463,0.000545069790796822,-0.6867125821119006,0.007116264379799851,2012064,21,14.856899,814.2632,1356.3212,1460.6455,1451.2404,1430.8816022822775,1404.8611170563809
-0.6760381855422009,3.0311350488005626,0.0005688909301469423,-0.6795963177321007,0.007116264379799851,2086588,19,15.069643,747.04736,1362.8954,1399.276,1440.6963,1435.5552294897527,1400.8714164399444
-0.6689219211624009,3.0314953463075094,0.0005650413585042255,-0.6724800533523009,0.007116264379799851,2079149,14,15.8393955,763.88654,1358.5402,1413.0514,1443.065,1434.7024391629686,1404.113743549568
-0.6618056567826012,3.03080424097766,0.0005482259262444271,-0.665363788972501,0.007116264379799851,2018138,16,16.218403,679.0539,1357.8325,1432.8827,1446.4657,1433.2910596507168,1402.8182494636314
-0.6546893924028012,3.03147785956473,0.0005543949646244381,-0.6582475245927012,0.007116264379799851,2039174,14,16.183176,748.33636,1361.8842,1421.7781,1444.5602,1434.88025595431,1404.2138004972528
...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [31]:

print("Creating local representation")

phase_mask = (lc_fold.phase > -4*fractional_duration) & (lc_fold.phase < 4.0*fractional_duration)
phase_mask


Creating local representation




array([False, False, False, ..., False, False, False])

In [32]:
lc_zoom = lc_fold[phase_mask]
lc_zoom



time,flux,flux_err,time_original,cadenceno,quality,sap_bkg_err,sap_bkg,sap_flux,sap_x,sap_y,centroid_row,centroid_col
TimeDelta,float64,float64,Time,int64,int32,float32,float32,float32,float32,float32,float64,float64
-0.1915339235346367,0.997689333185068,0.004265620452348926,1331.5405838889162,74938,0,1.5248067,406.3576,1229.448,--,--,1518.5334486043982,1849.6202509772875
-0.19152899733569845,1.002310666814932,0.00371114815507846,1331.5405888151151,74938,0,--,--,--,--,--,1517.7966275582723,1849.8461785874945
-0.1915218377504542,1.003069599393234,0.00978044926052971,2042.434058723566,3520692,0,5.2541466,778.3363,1438.6804,--,--,1413.769207265155,1238.8303300645393
-0.19151298888401733,1.0055790887890146,0.010981454366280053,2059.5984972565097,3594842,0,6.4177012,1314.4968,1455.0475,--,--,1413.7628622389652,1238.830879411116
-0.19150676834071528,1.0118808649544886,0.009554043057184864,2052.446657775354,3563946,0,5.063479,709.27374,1434.0446,--,--,1413.7320155037994,1238.81243542605
-0.19150011937755632,1.0120348373352492,0.009781435094283322,2041.0037113015992,3514513,0,5.2413597,775.53033,1451.0309,--,--,1413.756750155503,1238.8235888659328
-0.1914804033472837,0.975538145634521,0.009560458994086453,2039.5733618772897,3508334,0,5.1197524,728.004,1405.9664,--,--,1413.7771804460428,1238.8354024012658
-0.19147317682906348,0.9968960370644032,0.0036984243965635854,1344.4139668986797,84207,0,--,--,--,--,--,1517.7841314229527,1849.8862598777719
-0.19147313645312544,1.003103962935597,0.004243210540076147,1344.4139669390556,84207,0,1.5200578,401.47598,1215.4393,--,--,1518.514494048038,1849.639751624234
...,...,...,...,...,...,...,...,...,...,...,...,...


In [33]:
# we use 8x fractional duration here since we zoomed in on 4x the fractional duration on both sides
lc_local = lc_zoom.bin(time_bin_size=8*fractional_duration/local_bin_width_factor).normalize() - 1
lc_local


time,flux,flux_err,time_bin_start,time_bin_size,cadenceno,quality,sap_bkg_err,sap_bkg,sap_flux,sap_x,sap_y,centroid_row,centroid_col
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,d,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
TimeDelta,float64,float64,TimeDelta,float64,int64,int32,float32,float32,float32,float32,float32,float64,float64
-0.18839385591314708,0.00010629233581083142,0.0002871506150923601,-0.1915339235346367,0.006280135242979237,2105034,12,15.115054,662.90546,1363.1324,1442.1395,1448.0624,1430.8702522237459,1393.6117419238399
-0.18211372067016784,0.00018433225288227995,0.0002841657320043038,-0.18525378829165745,0.006280135242979237,2066715,13,13.947191,797.16614,1366.3137,1454.1147,1450.1226,1431.0813371034417,1394.6331935907024
-0.1758335854271886,9.71131206444209e-05,0.0002855808040920419,-0.1789736530486782,0.006280135242979237,2097547,9,15.818757,677.93744,1362.1885,1431.4188,1446.2148,1436.664079569808,1400.0838173319125
-0.16955345018420936,0.00027426320064849463,0.00028842675580461875,-0.17269351780569897,0.006280135242979237,2105469,11,12.752973,773.4305,1370.4257,1442.1367,1448.0579,1432.3543222115802,1393.380081391235
-0.16327331494123015,-0.00017480363216926254,0.0002882675171109812,-0.16641338256271976,0.006280135242979237,2073796,2,15.885529,726.6768,1360.5597,1408.2018,1442.2261,1433.3581825198323,1399.9380152603358
-0.15699317969825088,-8.540358712638696e-05,0.00028693875132631365,-0.1601332473197405,0.006280135242979237,2097171,12,13.586003,796.4646,1361.1002,1399.2751,1440.7032,1434.5072485220385,1396.6409142244947
-0.15071304445527167,-0.00014038798909499217,0.00028859869327565427,-0.15385311207676128,0.006280135242979237,2105480,9,14.31322,797.85266,1360.9425,1399.2771,1440.701,1436.2662459489702,1398.115691386593
-0.1444329092122924,-2.989964192434691e-05,0.000278104091020339,-0.147572976833782,0.006280135242979237,2031021,18,16.674221,725.32477,1357.7335,1413.0531,1443.0638,1431.3135089625084,1394.5523770149055
-0.1381527739693132,0.0,0.0002806160421304892,-0.1412928415908028,0.006280135242979237,2040863,17,16.103285,715.00055,1356.1246,1413.0526,1443.0667,1433.5987716245156,1399.6269488076903
...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [34]:
if not (len(lc_local) == local_bin_width_factor):
    logger.info(f'{tess_id} lc_local incorrect dimension: {len(lc_local)}')
    #return
lc_local = (lc_local / np.abs(np.nanmin(lc_local.flux)) ) * 2.0 + 1
lc_local

time,flux,flux_err,time_bin_start,time_bin_size,cadenceno,quality,sap_bkg_err,sap_bkg,sap_flux,sap_x,sap_y,centroid_row,centroid_col
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,d,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
TimeDelta,float64,float64,TimeDelta,float64,int64,int32,float32,float32,float32,float32,float32,float64,float64
-0.18839385591314708,1.014079745535556,0.038036680255800474,-0.1915339235346367,0.006280135242979237,2105034,12,15.115054,662.90546,1363.1324,1442.1395,1448.0624,1430.8702522237459,1393.6117419238399
-0.18211372067016784,1.0244171058503897,0.037641295263904063,-0.18525378829165745,0.006280135242979237,2066715,13,13.947191,797.16614,1366.3137,1454.1147,1450.1226,1431.0813371034417,1394.6331935907024
-0.1758335854271886,1.0128638440053723,0.037828739210429785,-0.1789736530486782,0.006280135242979237,2097547,9,15.818757,677.93744,1362.1885,1431.4188,1446.2148,1436.664079569808,1400.0838173319125
-0.16955345018420936,1.0363295814833753,0.03820572100891877,-0.17269351780569897,0.006280135242979237,2105469,11,12.752973,773.4305,1370.4257,1442.1367,1448.0579,1432.3543222115802,1393.380081391235
-0.16327331494123015,0.9768450788021603,0.03818462785795235,-0.16641338256271976,0.006280135242979237,2073796,2,15.885529,726.6768,1360.5597,1408.2018,1442.2261,1433.3581825198323,1399.9380152603358
-0.15699317969825088,0.9886872297481238,0.038008616257663795,-0.1601332473197405,0.006280135242979237,2097171,12,13.586003,796.4646,1361.1002,1399.2751,1440.7032,1434.5072485220385,1396.6409142244947
-0.15071304445527167,0.9814038599525776,0.03822849627132823,-0.15385311207676128,0.006280135242979237,2105480,9,14.31322,797.85266,1360.9425,1399.2771,1440.701,1436.2662459489702,1398.115691386593
-0.1444329092122924,0.9960394195245812,0.03683835531596641,-0.147572976833782,0.006280135242979237,2031021,18,16.674221,725.32477,1357.7335,1413.0531,1443.0638,1431.3135089625084,1394.5523770149055
-0.1381527739693132,1.0,0.0371710945690732,-0.1412928415908028,0.006280135242979237,2040863,17,16.103285,715.00055,1356.1246,1413.0526,1443.0667,1433.5987716245156,1399.6269488076903
...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [35]:

print("Preprocessing centroid")
local_cen, global_cen = preprocess_centroid(lc_local, lc_global)
local_cen

Preprocessing centroid


array([ 8.12177700e-01,  1.29161795e+00,  3.83845386e-01,  8.11927331e-01,
       -5.40069468e-01, -8.93715315e-01, -8.93725724e-01, -3.47238872e-01,
       -3.47158447e-01,  4.42424636e-01,  8.12345217e-01,  1.83234330e+00,
        1.21946424e+00,  3.03821453e-01, -3.47600093e-01, -1.88154932e+00,
       -7.00475424e-05, -1.21201560e+00, -1.31580158e-04,  8.12052644e-01,
        3.84381305e-01,  1.83231849e+00,  2.17334988e+00,  8.11960698e-01,
       -8.09127158e-01, -1.43161521e-01, -1.21191381e+00, -3.47364954e-01,
       -1.21205914e+00, -1.21211610e+00,  3.83906768e-01,  2.44659804e+00,
        1.29138450e+00,  3.84141490e-01, -8.08932526e-01,  8.11693731e-01,
       -1.88140831e+00, -8.93591758e-01, -1.88133768e+00,  0.00000000e+00,
        4.47831037e-05,  3.84364708e-01,  8.12483583e-01,  8.12361866e-01,
       -1.43089456e-01,  3.03919028e-01, -1.21193062e+00, -8.93570358e-01,
       -1.56654051e+00,  1.24789262e-05,  4.47831037e-05, -7.96085562e-07,
        1.29167591e+00,  

In [36]:
global_cen

array([ 8.11156512e-01, -6.35790944e-01, -6.35767134e-01,  8.11102834e-01,
        2.84690091e+00, -1.63753653e+00, -6.35820228e-01,  8.10835309e-01,
       -4.62188040e-05, -1.63764592e+00,  1.93242439e-01,  8.10768149e-01,
       -1.63755421e+00,  1.93883285e-01,  1.94006442e-01, -1.21266765e+00,
       -3.71237369e-01,  8.10583502e-01, -1.21313461e+00,  8.11074572e-01,
        1.93504913e-01, -1.21314911e+00,  8.10790103e-01,  1.93489518e-01,
       -6.35758126e-01,  1.93550000e-01,  1.93543180e-01, -6.35696828e-01,
        1.93641709e-01,  8.11074716e-01,  8.11097680e-01,  8.11097175e-01,
        1.93535044e-01,  8.10905065e-01,  8.11127529e-01,  1.93642038e-01,
        8.11143319e-01,  1.93625766e-01, -1.21316184e+00,  1.93688441e-01,
        1.94042954e-01, -3.71027713e-01, -3.71078396e-01,  1.93581118e-01,
       -3.71028001e-01,  1.94057471e-01, -3.71055397e-01, -1.73919594e+00,
       -1.73924840e+00,  1.93580350e-01, -2.22065382e+00, -2.22066134e+00,
       -8.90162952e-01, -