### Commands

```
python astronet/preprocess/generate_input_records.py --input_tce_csv_file=/mnt/tess/astronet/tces-MMMM.csv --tess_data_dir=/mnt/tess/lc --output_dir=/mnt/tess/astronet/tfrecords-NN --num_worker_processes=3
```

In [4]:
import os

from astroquery.mast import Catalogs
import numpy as np
import pandas as pd


tces_file = '/mnt/tess/labels/tce_bls_instar+old.csv'
ext_data_file = '/mnt/tess/labels/ext_mast_data.csv'
labels_file = '/mnt/tess/labels/labels_v3.csv'


tce_table = pd.read_csv(tces_file, header=0).set_index('tic_id')
tce_table = tce_table.drop(columns=['Unnamed: 0'])
joined_table = tce_table

ext_table = pd.read_csv(ext_data_file, header=0).set_index('tic_id')
joined_table = joined_table.join(ext_table, on='tic_id', how='left')

joined_table = joined_table[
    joined_table['objType'].isnull()
    | (joined_table['objType'] == 'STAR')
]
joined_table['Duration'] /= 24

joined_table = joined_table.reset_index()[[
    'tic_id', 'RA', 'Dec', 'Tmag', 'Epoc', 'Period', 'Duration',
    'Transit_Depth', 'Sectors', 'star_rad', 'star_mass', 'teff',
    'logg', 'SN', 'Qingress'
]]


labels_table = pd.read_csv(labels_file, header=0)
disps = ['E', 'J', 'N', 'S', 'B']
users = ['av', 'md', 'ch', 'as', 'mk']

for d in disps:
    labels_table[f'disp_{d}'] = 0

def set_labels(row):
    a = ~row.isna()
    if a['Decision']:
        row[f'disp_{row["Decision"]}'] = 1
    else:
        for user in users:
            if a[user] and row[user] and row[user] != 'U':
                row[f'disp_{row[user]}'] += 1
    return row

labels_table['tic_id'] = labels_table['TIC ID']
labels_table = labels_table.apply(set_labels, axis=1)

labels_table = labels_table[['tic_id'] + [f'disp_{d}' for d in disps]]


joined_table = joined_table.set_index('tic_id')
labels_table = labels_table.set_index('tic_id')
joined_table = joined_table.join(labels_table, on='tic_id', how='inner')
print(f'Total entries: {len(joined_table)}')
joined_table = joined_table[
    sum(joined_table[f'disp_{d}'] for d in disps) > 0
]
print(f'Total labeled entries: {len(joined_table)}')


joined_table.to_csv('/mnt/tess/astronet/tces-v3.csv')

Total entries: 14877
Total labeled entries: 7992


In [5]:
pd.set_option('display.max_columns', None)
joined_table.sample(5)

Unnamed: 0_level_0,RA,Dec,Tmag,Epoc,Period,Duration,Transit_Depth,Sectors,star_rad,star_mass,teff,logg,SN,Qingress,disp_E,disp_J,disp_N,disp_S,disp_B
tic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
273365683,338.650914,-73.539647,9.117,1325.824238,3.472809,0.218926,490.0,20.0,12.0808,,4783.7,,11.45604,0.21706,0,4,1,0,0
306736781,119.766897,-68.860619,10.6031,1332.996216,15.307288,0.146185,1370.0,20.0,9.99166,0.785874,4839.5,1.81,9.96674,0.24726,0,4,0,0,0
310103683,76.450021,-66.017993,10.0886,1626.939172,2.979059,0.023564,1770.0,20.0,2.18303,1.13,6067.0,3.81303,11.38573,0.12107,0,3,1,0,0
177282922,102.013088,-72.364418,11.298,1326.132975,2.394337,0.105375,150.0,20.0,11.4651,,4817.0,,9.22229,0.22499,0,2,2,0,0
299698550,270.458391,-65.995308,11.1821,1663.726338,9.241661,0.229655,1390.0,20.0,13.3578,,4672.0,,16.28596,0.22582,0,4,0,0,0


### Run once

In [None]:
def load_tces_old():
    tceold = pd.read_csv('/mnt/tess/astronet/tces.csv', header=0).set_index('tic_id')

    # Only keep the max sectors read.
    maxsect = tceold.groupby('tic_id')['Sectors'].max()
    tceold = tceold.join(maxsect, on='tic_id', how='right', rsuffix='_max')
    tceold = tceold[tceold.Sectors == tceold.Sectors_max]

    # Then keep the max row ID.
    maxrowid = tceold.groupby('tic_id')['row_id'].max()
    tceold = tceold.join(maxrowid, on='tic_id', how='right', rsuffix='_max')
    tceold = tceold[tceold.row_id == tceold.row_id_max]

    return tceold

def generate_tce_bls_instar():
    tcenew = pd.read_csv('/mnt/tess/labels/tce_bls_instar.csv', header=0).set_index('tic_id')
    tceold = load_tces_old()

    # Copy from old data where it's missing from the new.
    alltce = tcenew.join(tceold, how='outer', on='tic_id', rsuffix='_old')
    alltce = alltce.set_index('tic_id')

    alltce = alltce.drop(columns=['row_id'])

    def fillna(df, col_name):
        df.loc[df[col_name].isna(), col_name] = df.loc[df[col_name].isna(), col_name + '_old']

    fillna(alltce, 'toi_id')
    fillna(alltce, 'Disposition')
    fillna(alltce, 'RA')
    fillna(alltce, 'Dec')
    fillna(alltce, 'Tmag')
    fillna(alltce, 'Epoc')
    fillna(alltce, 'Period')
    fillna(alltce, 'Duration')
    fillna(alltce, 'Transit_Depth')
    fillna(alltce, 'Sectors')
    fillna(alltce, 'camera')
    fillna(alltce, 'ccd')
    fillna(alltce, 'star_rad')
    fillna(alltce, 'star_mass')
    fillna(alltce, 'teff')
    fillna(alltce, 'logg')
    fillna(alltce, 'SN')
    fillna(alltce, 'Qingress')

    alltce = alltce.drop(columns=[c for c in alltce.columns if c.endswith('_old')])

    alltce['Ilabel'] = alltce['Ilabel'].fillna(False)

    alltce.to_csv('/mnt/tess/labels/tce_bls_instar+old.csv')

### Old code

In [None]:
# import numpy as np
# import pandas as pd

# d1 = pd.read_csv('../temp/chdispositions_relabel.csv', header=0).set_index('tic')
# d2 = pd.read_csv('../temp/chdispositions_uniq.csv', header=0).set_index('tic')

# d3 = pd.concat([d1, d2])
# d3 = d3.drop(columns=['Unnamed: 3'])

# def amend(df, f):
#     d4 = pd.read_csv(f, names=['tic', 'nflag']).set_index('tic')
#     joined = df.join(d4, how='outer')
#     joined['flag'] = np.where(joined['nflag'].isna(), joined['flag'], joined['nflag'])
#     joined = joined[['user', 'flag']]
#     return joined

# joined = amend(d3, '../temp/CHrelabel_part1.ls')
# joined = amend(joined, '../temp/CHlabel_20200624.ls')
# joined = amend(joined, '../temp/CHlabel_20200625.ls')
# joined['user'] = 1

# # joined.to_csv('/mnt/tess/labels/chdispositions.csv')

In [None]:
# dliang = load_tces_old().reset_index()
# dliang = dliang[dliang.Disposition == 'PC']
# dliang = dliang.rename(columns={'tic_id': 'tic'})
# dliang['user'] = 2
# dliang['flag'] = 'E'

# dliang = dliang[['user', 'flag', 'tic']].set_index('tic')

# # dliang.to_csv('../yldispositions.csv')

In [None]:
# import os

# from astroquery.mast import Catalogs
# import numpy as np
# import pandas as pd


# include_liang_data = True


# if include_liang_data:
#     files = ['chdispositions.csv', 'avdispositions.csv', 'yldispositions.csv']
#     tces_file = '/mnt/tess/labels/tce_bls_instar+old.csv'

# else:
#     files = ['chdispositions.csv', 'avdispositions.csv']
#     tces_file = '/mnt/tess/labels/tce_bls_instar.csv'
    
# ext_data_file = '/mnt/tess/labels/ext_mast_data.csv'


# tce_table = pd.read_csv(tces_file, header=0).set_index('tic_id')
# tce_table = tce_table.drop(columns=['Unnamed: 0'])
# joined_table = tce_table

# ext_table = pd.read_csv(ext_data_file, header=0).set_index('tic_id')
# joined_table = joined_table.join(ext_table, on='tic_id', how='left')

# joined_table = joined_table[
#     joined_table["objType"].isnull()
#     | (joined_table["objType"] == 'STAR')
# ]


# joined_table['disp_E'] = 0
# joined_table['disp_J'] = 0
# joined_table['disp_N'] = 0
# joined_table['disp_S'] = 0
# joined_table['disp_B'] = 0
# joined_table['disp_I'] = 0

# for name in files:
#     with open(os.path.join('/mnt/tess/labels', name)) as f:
#         labels_table = pd.read_csv(f, header=0, usecols=[0, 1, 2])
#         labels_table = labels_table.rename(columns={'tic': 'tic_id'}).set_index('tic_id')
#         tag = name[:2]
#         label = 'flag_' + tag
#         labels_table = labels_table.rename(columns={'user': 'user_' + tag, 'flag': label})
#         joined_table = joined_table.join(labels_table, on='tic_id', how='left')
        
#         joined_table['disp_E'] += (joined_table[label] == 'E').map({True: 1, False: 0})
#         joined_table['disp_N'] += (joined_table[label] == 'N').map({True: 1, False: 0})
#         joined_table['disp_J'] += (joined_table[label] == 'J').map({True: 1, False: 0})
#         joined_table['disp_S'] += (joined_table[label] == 'S').map({True: 1, False: 0})
#         joined_table['disp_B'] += (joined_table[label] == 'B').map({True: 1, False: 0})

#         joined_table.loc[joined_table['Ilabel'], 'disp_E'] = 0
#         joined_table.loc[joined_table['Ilabel'], 'disp_N'] = 0
#         joined_table.loc[joined_table['Ilabel'], 'disp_J'] = 0
#         joined_table.loc[joined_table['Ilabel'], 'disp_S'] = 0
#         joined_table.loc[joined_table['Ilabel'], 'disp_B'] = 0
#         joined_table.loc[joined_table['Ilabel'], 'disp_I'] = 1

# fltr = (~joined_table["flag_ch"].isnull()
#         | ~joined_table["flag_av"].isnull())
    
# if include_liang_data:
#     fltr = fltr | ~joined_table["flag_yl"].isnull()
    
# joined_table = joined_table[fltr]

# if include_liang_data:
#     joined_table.to_csv('/mnt/tess/astronet/tces-new+old.csv')
# else:
#     joined_table.to_csv('/mnt/tess/astronet/tces-new.csv')