# Organize Files for TurboSETI 

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
targetinfo = pd.read_csv('/home/ubuntu/Documents/BL-TESSsearch/Target-Selection/TESStargets.csv')

In [3]:
filepaths = np.loadtxt(os.path.join(os.getcwd(), 'bls0-filelocs'), dtype=str)

# Get file names
filenames = np.array([os.path.split(path)[1] for path in filepaths])

# Get TIC ID
targetnames = np.array([name.split('.')[0].split('_')[-2] for name in filenames])

# Set done turboSETI value to FALSE to start
doneTurbo = np.array([False for ii in range(len(filepaths))])

In [4]:
# Get spliced info
isSpliced = []
for name in filenames:
    if name.find('spliced') != -1:
        isSpliced.append('spliced')
    else:
        isSpliced.append('unspliced')

In [5]:
# Date and time of observation
from astropy.time import Time 

mjd_int = []
utc_sec = []

for ii in range(len(filenames)):

    mjd_int.append(int(filenames[ii].split('_')[-4]))
    utc_sec.append(int(filenames[ii].split('_')[-3]))

mjd_int = np.array(mjd_int)
utc_sec = np.array(utc_sec)
    
mjd = mjd_int + (utc_sec/86400)

times = Time(mjd, format='mjd', scale='utc')#.to_value('isot')

targetTimes = Time([time.replace(' ', 'T') for time in targetinfo['utc_observed'].to_numpy()], scale='utc')

In [6]:
# ON or OFF

def getCadence(inFrame, time_key='TIMES', target_key='TARGETS', tolerance=6*60):
    '''
    inFrame: Pandas DataFrame with time and target columns, sorted by time
    returns : 2D list of target names with each list as a cadence
    '''
    
    from datetime import timedelta
    
    times = inFrame[time_key].to_numpy()
    targets = inFrame[target_key].to_numpy()

    index = 0
    cadenceTargs = {index : [targets[0]]}
    for ii in range(1,len(times)):
        if times[ii-1] + timedelta(seconds=tolerance) < times[ii]:
            index += 1
            cadenceTargs[index] = [np.unique(targets[ii])[0]]
        else:
            cadenceTargs[index].append(np.unique(targets[ii])[0])
    
    cadenceList = []
    for key in cadenceTargs:
        cadenceList.append(np.unique(cadenceTargs[key]))
        
    return cadenceList
    
inTable = pd.DataFrame({'TIMES' : times, 'TARGETS' : targetnames}).sort_values('TIMES')
cadence = getCadence(inTable)

In [7]:
# Fix bugged lists of cadences by hand
# See bugged rows
for i, cc in enumerate(cadence):
    if len(cc) != 4:
        print(i, cc)

# Fix issue in row 32
cadence.append(np.array(['TIC376637093', 'HIP112317', 'HIP111325', 'HIP110885']))
cadence[32] = np.array(['TIC376682699', 'HIP110885', 'HIP111325', 'HIP112946'])

# Fix issue in row 6
cadence.append(np.array(['TIC121338379', 'HIP42077', 'HIP41968', 'HIP41961']))
cadence[6] = np.array(['TIC178367144', 'HIP40087', 'HIP40426', 'HIP40858'])

6 ['HIP40087' 'HIP40426' 'HIP40858' 'HIP41961' 'HIP41968' 'HIP42077'
 'TIC121338379' 'TIC178367144']
32 ['HIP110885' 'HIP111325' 'HIP112317' 'HIP112946' 'TIC376637093'
 'TIC376682699']


# Notes on Bugged Rows
After further inspection, I found that these two rows group two cadences together because the two TIC targets in each are observed within 6 minutes of eachother. This means that both of these TOI were observed back to back. In addition, for row 32, there are only 4 OFF targets listed because the two TIC targets share two of the OFF targets.

In [8]:
# Create On Off and TOI lists

onoff = np.ones(len(times)).astype(str)
TOI = np.ones(len(times)).astype(str)

for cc in cadence:
    
    toi = None
    for name in cc:
        
        whereName = np.where(targetnames == name)[0]
        
        # Create on/off list
        if name[:3] == 'TIC':
            onoff[whereName] = 'ON'
            toi = name
        else:
            onoff[whereName] = 'OFF'
    
    for name in cc:
        
        whereName = np.where(targetnames == name)[0]
        TOI[whereName] = toi

In [9]:
# Create dataframe of all info

infodic = {'TARGET NAME': targetnames, 
           'OBS TIME': times,
           'ON / OFF': onoff,
           'TOI': TOI,
           'FILE NAME': filenames, 
           'FILE PATH': filepaths, 
           'SPLICED?': isSpliced, 
           'TurboSETI?': doneTurbo}

fileinfo = pd.DataFrame(infodic)

In [10]:
sortedfileinfo = fileinfo.sort_values('OBS TIME')
sortedfileinfo.to_csv('target-file-info.csv')

In [11]:
sortedfileinfo

Unnamed: 0,TARGET NAME,OBS TIME,ON / OFF,TOI,FILE NAME,FILE PATH,SPLICED?,TurboSETI?
6308,TIC344926234,58811.613541666666,ON,TIC344926234,spliced_blc0001020304050607_guppi_58811_53010_...,/mnt_bls8/datax3/collate/AGBT19B_999_114/splic...,spliced,False
6302,HIP49458,58811.61717592592,OFF,TIC344926234,spliced_blc0001020304050607_guppi_58811_53324_...,/mnt_bls8/datax3/collate/AGBT19B_999_114/splic...,spliced,False
6309,TIC344926234,58811.62081018519,ON,TIC344926234,spliced_blc0001020304050607_guppi_58811_53638_...,/mnt_bls8/datax3/collate/AGBT19B_999_114/splic...,spliced,False
6303,HIP49824,58811.624502314815,OFF,TIC344926234,spliced_blc0001020304050607_guppi_58811_53957_...,/mnt_bls8/datax3/collate/AGBT19B_999_114/splic...,spliced,False
6310,TIC344926234,58811.62819444444,ON,TIC344926234,spliced_blc0001020304050607_guppi_58811_54276_...,/mnt_bls8/datax3/collate/AGBT19B_999_114/splic...,spliced,False
...,...,...,...,...,...,...,...,...
1563,HIP31574,59365.918275462966,OFF,TIC438490744,blc45_guppi_59365_79339_HIP31574_0113.rawspec....,/mnt_blc45/datax/dibas/AGBT21A_996_41/GUPPI/BL...,unspliced,False
1619,HIP31574,59365.918275462966,OFF,TIC438490744,blc63_guppi_59365_79339_HIP31574_0113.rawspec....,/mnt_blc63/datax/dibas/AGBT21A_996_41/GUPPI/BL...,unspliced,False
1594,HIP31574,59365.918275462966,OFF,TIC438490744,blc75_guppi_59365_79339_HIP31574_0113.rawspec....,/mnt_blc75/datax/dibas/AGBT21A_996_41/GUPPI/BL...,unspliced,False
1575,HIP31574,59365.918275462966,OFF,TIC438490744,blc46_guppi_59365_79339_HIP31574_0113.rawspec....,/mnt_blc46/datax/dibas/AGBT21A_996_41/GUPPI/BL...,unspliced,False


In [12]:
# Count number of ON targets observed

len(sortedfileinfo['TOI'].unique())

55

# Notes on Notebook Output
* The idea to check if the enxt observation is within a timedelta is from Gavin Groode
* This pandas dataframe is more visible at https://docs.google.com/spreadsheets/d/1Jny2OhsXjr23HhlN_e5bJ-WITwP2HSFZJIBWg3Dpr_Y/edit?usp=sharing
* In the above spreadsheet I had to fix TOI rows 3622-3645 and 3718-3741 because I do not handle double OFF targets well in this code (Note that I verified by visual inspection that this bug only occurs for these rows so I will not adjust the code)