### Run upon export from spreadsheet

In [1]:
import os

from astroquery.mast import Catalogs
import numpy as np
import pandas as pd


tces_file = '/mnt/tess/labels/tois.csv'
full_toi_file = '/mnt/tess/labels/full_toi_list.csv'
ext_data_file = '/mnt/tess/labels/ext_mast_data.csv'


tce_table = pd.read_csv(tces_file, header=0, low_memory=False)
tce_table['tic_id'] = tce_table['TIC']
tce_table['Duration'] = tce_table['Transit Duration Value']
tce_table['Period'] = tce_table['Orbital Period Value']
tce_table['RA'] = tce_table['TIC Right Ascension']
tce_table['Sectors'] = tce_table['Sectors'].apply(lambda v: len(v.split(' ')))
tce_table['Transit_Depth'] = tce_table['Transit Depth Value']
tce_table['Dec'] = tce_table['TIC Declination']
tce_table['teff'] = tce_table['Effective Temperature Value']
tce_table['SN'] = tce_table['Signal-to-noise']
tce_table['Qingress'] = 0.0
tce_table['Tmag'] = tce_table['TMag Value']
tce_table['logg'] = tce_table['Surface Gravity Value']
tce_table['Epoc'] = tce_table['Epoch Value']
tce_table['star_rad'] = tce_table['Star Radius Value']
# G = 6.67e-8 in cgs
tce_table['star_mass'] = ((10 ** tce_table['logg']) * (tce_table['star_rad'] ** 2)) / 6.67e-8
tce_table = tce_table.set_index('tic_id')

tce_table['Duration'] /= 24.0

tce_table = tce_table.reset_index()[[
    'tic_id', 'RA', 'Dec', 'Tmag', 'Epoc', 'Period', 'Duration',
    'Transit_Depth', 'Sectors', 'star_rad', 'star_mass', 'teff',
    'logg', 'SN', 'Qingress'
]].set_index('tic_id')


full_toi_table = pd.read_csv(full_toi_file, header=1, low_memory=False)
full_toi_table = full_toi_table.set_index('tic_id')
full_toi_table[~full_toi_table['duration_bls'].isna()]


bls_table = full_toi_table.reset_index()[[
    'tic_id',
    'star_mass_bls',
    'star_rad_bls',
    'epoch_bls',
    'period_bls',
    'depth_bls',
    'duration_bls',
    'tmag_bls',
    'Signal-to-noise',
]].set_index('tic_id')

joined_table = tce_table.join(bls_table, on='tic_id', how='inner')
joined_table = joined_table[~joined_table['epoch_bls'].isna()]

joined_table['star_mass'] = joined_table['star_mass_bls']
joined_table['star_rad'] = joined_table['star_rad_bls']
joined_table['Epoc'] = joined_table['epoch_bls']
joined_table['Period'] = joined_table['period_bls']
joined_table['Transit_Depth'] = joined_table['depth_bls']
joined_table['Duration'] = joined_table['duration_bls']
joined_table['Tmag'] = joined_table['tmag_bls']
joined_table['SN'] = joined_table['Signal-to-noise']

joined_table = joined_table.reset_index()[[
    'tic_id', 'RA', 'Dec', 'Tmag', 'Epoc', 'Period', 'Duration',
    'Transit_Depth', 'Sectors', 'star_rad', 'star_mass', 'teff',
    'logg', 'SN', 'Qingress'
]]
joined_table['Source'] = 2

final_table = joined_table

tev_table = full_toi_table.reset_index()[[
    'tic_id',
    'star_mass_bls',
    'star_rad_bls',
    'epoch_tev',
    'period_tev',
    'depth_tev',
    'duration_tev',
    'tmag_tev',
    'Signal-to-noise',
]].set_index('tic_id')

joined_table = tce_table.join(tev_table, on='tic_id', how='inner')
joined_table = joined_table[~joined_table['epoch_tev'].isna()]

joined_table['star_mass'] = joined_table['star_mass_bls']
joined_table['star_rad'] = joined_table['star_rad_bls']
joined_table['Epoc'] = joined_table['epoch_tev']
joined_table['Period'] = joined_table['period_tev']
joined_table['Transit_Depth'] = joined_table['depth_tev']
joined_table['Duration'] = joined_table['duration_tev']
joined_table['Tmag'] = joined_table['tmag_tev']
joined_table['SN'] = joined_table['Signal-to-noise']

joined_table['Duration'] /= 24.0

joined_table = joined_table.reset_index()[[
    'tic_id', 'RA', 'Dec', 'Tmag', 'Epoc', 'Period', 'Duration',
    'Transit_Depth', 'Sectors', 'star_rad', 'star_mass', 'teff',
    'logg', 'SN', 'Qingress'
]]
joined_table['Source'] = 3

final_table = final_table.append(joined_table)

final_table.to_csv('/mnt/tess/astronet/tces-toi-bls-vs-tev.csv')

```
python astronet/preprocess/generate_input_records.py --input_tce_csv_file=/mnt/tess/astronet/tces-toi-bls-vs-tev --tess_data_dir=/mnt/tess/lc --output_dir=/mnt/tess/astronet/tfrecords-toi-bls-vs-tev --num_shards=1
```

In [60]:
final_table[final_table.tic_id == 161477033]

Unnamed: 0,tic_id,RA,Dec,Tmag,Epoc,Period,Duration,Transit_Depth,Sectors,star_rad,star_mass,teff,logg,SN,Qingress,Source
323,161477033,73.260995,-45.540671,10.061,2193.3292,40.8923,0.18,1050.0,4,0.879477,0.91,5465.0,4.57025,12.0,0.0,2
324,161477033,73.260995,-45.540671,10.061,2193.3292,40.8923,0.18,1050.0,2,0.879477,0.91,5465.0,4.57025,12.0,0.0,2
325,161477033,73.260995,-45.540671,10.061,2193.3292,40.8923,0.18,1050.0,1,0.879477,0.91,5465.0,4.57025,12.0,0.0,2
697,161477033,73.260995,-45.540671,10.018,1419.37,11.928,0.119167,730.16,4,,,5465.0,4.57025,8.55,0.0,3
698,161477033,73.260995,-45.540671,10.018,1443.222,14.058,0.14625,858.62,4,,,5465.0,4.57025,8.82,0.0,3
699,161477033,73.260995,-45.540671,10.018,2193.316,20.445,0.155833,920.0,4,0.879477,0.91,5465.0,4.57025,12.0,0.0,3
700,161477033,73.260995,-45.540671,10.018,1419.37,11.928,0.119167,730.16,2,,,5465.0,4.57025,8.55,0.0,3
701,161477033,73.260995,-45.540671,10.018,1443.222,14.058,0.14625,858.62,2,,,5465.0,4.57025,8.82,0.0,3
702,161477033,73.260995,-45.540671,10.018,2193.316,20.445,0.155833,920.0,2,0.879477,0.91,5465.0,4.57025,12.0,0.0,3
703,161477033,73.260995,-45.540671,10.018,1419.37,11.928,0.119167,730.16,1,,,5465.0,4.57025,8.55,0.0,3


In [53]:
pd.set_option('display.max_columns', None)
final_table.sample(5)

Unnamed: 0,tic_id,RA,Dec,Tmag,Epoc,Period,Duration,Transit_Depth,Sectors,star_rad,star_mass,teff,logg,SN,Qingress,Source
67,52368076,23.594697,-66.67583,10.138,1334.457,9.165477,0.141667,923.85,3,,,5154.0,4.47072,7.75,0.0,3
135,207237016,49.816355,-59.40016,11.246,1365.0031,26.4129,0.1,6120.0,5,2.62866,1.13,6070.0,3.65,23.0,0.0,2
591,177722855,107.421556,-11.32895,10.522,1492.267,2.38933,0.20125,660.0,1,1.37031,1.026,5743.0,4.18,16.0,0.0,3
122,219388773,77.047581,-50.858766,10.679,1412.383,1.571527,0.06,720.0,1,0.817898,0.76,4752.8,4.37,7.35,0.0,2
1006,344085117,108.017394,-50.247688,11.973,1541.805,0.6597,0.027917,960.0,1,1.25112,1.05,5832.0,4.26,12.0,0.0,3


### Run once

In [54]:
def load_tces_old():
    tceold = pd.read_csv('/mnt/tess/astronet/tces.csv', header=0).set_index('tic_id')

    # Only keep the max sectors read.
    maxsect = tceold.groupby('tic_id')['Sectors'].max()
    tceold = tceold.join(maxsect, on='tic_id', how='right', rsuffix='_max')
    tceold = tceold[tceold.Sectors == tceold.Sectors_max]

    # Then keep the max row ID.
    maxrowid = tceold.groupby('tic_id')['row_id'].max()
    tceold = tceold.join(maxrowid, on='tic_id', how='right', rsuffix='_max')
    tceold = tceold[tceold.row_id == tceold.row_id_max]

    return tceold

def generate_tce_bls_instar():
    tcenew = pd.read_csv('/mnt/tess/labels/tce_bls_instar.csv', header=0).set_index('tic_id')
    tceold = load_tces_old()
    tcenorth = pd.read_csv('/mnt/tess/labels/tce_north_instar.csv', header=0).set_index('tic_id')

    # Copy from old data where it's missing from the new.
    alltce = tcenew.join(tceold, how='outer', on='tic_id', rsuffix='_old')
    alltce = alltce.set_index('tic_id')

    alltce = alltce.drop(columns=['row_id'])

    def fillna(df, col_name):
        df.loc[df[col_name].isna(), col_name] = df.loc[df[col_name].isna(), col_name + '_old']

    fillna(alltce, 'toi_id')
    fillna(alltce, 'Disposition')
    fillna(alltce, 'RA')
    fillna(alltce, 'Dec')
    fillna(alltce, 'Tmag')
    fillna(alltce, 'Epoc')
    fillna(alltce, 'Period')
    fillna(alltce, 'Duration')
    fillna(alltce, 'Transit_Depth')
    fillna(alltce, 'Sectors')
    fillna(alltce, 'camera')
    fillna(alltce, 'ccd')
    fillna(alltce, 'star_rad')
    fillna(alltce, 'star_mass')
    fillna(alltce, 'teff')
    fillna(alltce, 'logg')
    fillna(alltce, 'SN')
    fillna(alltce, 'Qingress')

    alltce = alltce.drop(columns=[c for c in alltce.columns if c.endswith('_old')])
    
    alltce = alltce.append(tcenorth)
    
    alltce['Ilabel'] = alltce['Ilabel'].fillna(False)

    alltce.to_csv('/mnt/tess/labels/tce_bls_instar+old.csv')