### Run upon export from spreadsheet

In [None]:
import os

from astroquery.mast import Catalogs
import numpy as np
import pandas as pd


tces_file = '/mnt/tess/labels/s33_cam1_sample.csv'
ext_data_file = '/mnt/tess/labels/ext_mast_data.csv'


tce_table = pd.read_csv(tces_file, header=0, low_memory=False)
tce_table['tic_id'] = tce_table['star_tic']
tce_table['Duration'] = tce_table['planet_tdur']
tce_table['Period'] = tce_table['planet_period']
tce_table['RA'] = tce_table['star_ra']
tce_table['Sectors'] = tce_table['sector_id'].apply(lambda v: len(v.split(' ')))
tce_table['Transit_Depth'] = tce_table['planet_depth']
tce_table['Dec'] = tce_table['star_dec']
tce_table['teff'] = tce_table['star_teff']
tce_table['SN'] = tce_table['snr']
tce_table['Qingress'] = 0.0
tce_table['Tmag'] = tce_table['star_tmag']
tce_table['logg'] = tce_table['star_logg']
tce_table['Epoc'] = tce_table['planet_epoch']
tce_table = tce_table.set_index('tic_id')
tce_table = tce_table.drop(columns=['Unnamed: 0'])

tce_table['Duration'] /= 24.0

# Drop some common invalid examples.
# Orbits falling inside the star
tce_table = tce_table[~tce_table.Ilabel]
# Excessively large durations
tce_table = tce_table[tce_table.Duration < 0.9 * tce_table.Period]

joined_table = tce_table

ext_table = pd.read_csv(ext_data_file, header=0, low_memory=False).set_index('tic_id')
joined_table = joined_table.join(ext_table, on='tic_id', how='left')

joined_table = joined_table[
    joined_table['objType'].isnull()
    | (joined_table['objType'] == 'STAR')
]

joined_table = joined_table.reset_index()[[
    'tic_id', 'RA', 'Dec', 'Tmag', 'Epoc', 'Period', 'Duration',
    'Transit_Depth', 'Sectors', 'star_rad', 'star_mass', 'teff',
    'logg', 'SN', 'Qingress'
]]


disps = ['E', 'J', 'N', 'S', 'B']

for d in disps:
    joined_table[f'disp_{d}'] = 0

joined_table = joined_table.set_index('tic_id')
print(f'Total entries: {len(joined_table)}')


joined_table.to_csv('/mnt/tess/astronet/tces-s33_cam1_sample.csv')

# python astronet/preprocess/generate_input_records.py --input_tce_csv_file=/mnt/tess/astronet/tces-s33_cam1_sample.csv --tess_data_dir=/mnt/tess/lc --output_dir=/mnt/tess/astronet/tfrecords-s33-cam1-sample --num_shards=1

In [None]:
pd.set_option('display.max_columns', None)
joined_table.sample(5)

### Run once

In [None]:
def load_tces_old():
    tceold = pd.read_csv('/mnt/tess/astronet/tces.csv', header=0).set_index('tic_id')

    # Only keep the max sectors read.
    maxsect = tceold.groupby('tic_id')['Sectors'].max()
    tceold = tceold.join(maxsect, on='tic_id', how='right', rsuffix='_max')
    tceold = tceold[tceold.Sectors == tceold.Sectors_max]

    # Then keep the max row ID.
    maxrowid = tceold.groupby('tic_id')['row_id'].max()
    tceold = tceold.join(maxrowid, on='tic_id', how='right', rsuffix='_max')
    tceold = tceold[tceold.row_id == tceold.row_id_max]

    return tceold

def generate_tce_bls_instar():
    tcenew = pd.read_csv('/mnt/tess/labels/tce_bls_instar.csv', header=0).set_index('tic_id')
    tceold = load_tces_old()
    tcenorth = pd.read_csv('/mnt/tess/labels/tce_north_instar.csv', header=0).set_index('tic_id')

    # Copy from old data where it's missing from the new.
    alltce = tcenew.join(tceold, how='outer', on='tic_id', rsuffix='_old')
    alltce = alltce.set_index('tic_id')

    alltce = alltce.drop(columns=['row_id'])

    def fillna(df, col_name):
        df.loc[df[col_name].isna(), col_name] = df.loc[df[col_name].isna(), col_name + '_old']

    fillna(alltce, 'toi_id')
    fillna(alltce, 'Disposition')
    fillna(alltce, 'RA')
    fillna(alltce, 'Dec')
    fillna(alltce, 'Tmag')
    fillna(alltce, 'Epoc')
    fillna(alltce, 'Period')
    fillna(alltce, 'Duration')
    fillna(alltce, 'Transit_Depth')
    fillna(alltce, 'Sectors')
    fillna(alltce, 'camera')
    fillna(alltce, 'ccd')
    fillna(alltce, 'star_rad')
    fillna(alltce, 'star_mass')
    fillna(alltce, 'teff')
    fillna(alltce, 'logg')
    fillna(alltce, 'SN')
    fillna(alltce, 'Qingress')

    alltce = alltce.drop(columns=[c for c in alltce.columns if c.endswith('_old')])
    
    alltce = alltce.append(tcenorth)
    
    alltce['Ilabel'] = alltce['Ilabel'].fillna(False)

    alltce.to_csv('/mnt/tess/labels/tce_bls_instar+old.csv')