In [1]:
import lightkurve as lk
import matplotlib.pyplot as plt
import math
from astropy.io import ascii
from astropy.io import fits
import numpy as np
from scipy.optimize import curve_fit
from glob import glob
import pandas as pd
import os

In [2]:
#gp
# ---
gp_data = ascii.read('/Users/mschochet/Desktop/MSU_PHD/lco_aumic/lcs_posttwirl/gp.xls')
gp_data.sort(keys='rel_flux_T1')
gp_badfiles = [str(val) for val in list(gp_data[:2]['Label'])]

gp_cleaned = gp_data[2:] # Remove any frames where AIJ misplaced the aperture and T1 or C2 fluxes are negative/near-0
gp_cleaned.sort(keys='J.D.-2400000')
gp_d, gp_aum_flux, gp_c1_flux = gp_cleaned['J.D.-2400000'], gp_cleaned['rel_flux_T1'], gp_cleaned['rel_flux_C2']
# ---

#ip
# ---
ip_data = ascii.read('/Users/mschochet/Desktop/MSU_PHD/lco_aumic/lcs_posttwirl/ip.xls')
ip_data.sort(keys='rel_flux_T1')
ip_badfiles = [str(val) for val in list(ip_data[:28]['Label'])]
ip_badfiles.append(ip_data[len(ip_data)-1]["Label"])

ip_cleaned = ip_data[28:len(ip_data)-1] # Same as above but there is a +40 value out of nowhere, don't think that is real
ip_cleaned.sort(keys='J.D.-2400000')
ip_d, ip_aum_flux, ip_c1_flux = ip_cleaned['J.D.-2400000'], ip_cleaned['rel_flux_T1'], ip_cleaned['rel_flux_C2']
# ---

#rp
# ---
rp_data = ascii.read('/Users/mschochet/Desktop/MSU_PHD/lco_aumic/lcs_posttwirl/rp.xls')
rp_data.sort(keys='rel_flux_T1')
rp_badfiles = [str(val) for val in list(rp_data[:1]['Label'])]

rp_cleaned = rp_data[1:] # Same as above but there is only one bad value
rp_cleaned.sort(keys='J.D.-2400000')
rp_d, rp_aum_flux, rp_c1_flux = rp_cleaned['J.D.-2400000'], rp_cleaned['rel_flux_T1'], rp_cleaned['rel_flux_C2']
# ---

#U
# ---
U_data = ascii.read('/Users/mschochet/Desktop/MSU_PHD/lco_aumic/lcs_posttwirl/U.xls')
U_data.sort(keys='rel_flux_T1')
U_badfiles = [str(val) for val in list(U_data[(len(U_data))-13:]['Label'])]
U_badfiles.append(U_data[0]["Label"])
U_badfiles.append(U_data[1]["Label"])


U_cleaned = U_data[2:(len(U_data))-13]# Same as above but there are 13 values with substantially too large flux values, and 2 with values too low 
U_cleaned.sort(keys='J.D.-2400000')
U_d, U_aum_flux, U_c1_flux = U_cleaned['J.D.-2400000'], U_cleaned['rel_flux_T1'], U_cleaned['rel_flux_C2']
# ---

#B
# ---
B_data = ascii.read('/Users/mschochet/Desktop/MSU_PHD/lco_aumic/lcs_posttwirl/B.xls')

B_cleaned = B_data.copy()
B_cleaned.sort(keys='J.D.-2400000')
B_d, B_aum_flux, B_c1_flux = B_cleaned['J.D.-2400000'], B_cleaned['rel_flux_T1'], B_cleaned['rel_flux_C2']
# ---

#V
# ---
V_data = ascii.read('/Users/mschochet/Desktop/MSU_PHD/lco_aumic/lcs_posttwirl/V.xls')

V_cleaned = V_data.copy()
V_cleaned.sort(keys='J.D.-2400000')
V_d, V_aum_flux, V_c1_flux = V_cleaned['J.D.-2400000'], V_cleaned['rel_flux_T1'], V_cleaned['rel_flux_C2']
# ---

## Find and save the names of the bad files in each filter (Only works with file drive plugged in)

In [5]:
# This accounts for files lost by twirl and with bad AIJ values, but to catch the ones that crashed AIJ I need to compare

def find_lost(datalist, type, og_badfiles):
    """ Re-solves an LCO photometry WCS in the file header. Takes in a .fits file and returns the same file, with a newly re-solved WCS using twirl's 
        interface for finding sources and matching to Gaia DR3.

    Args:
        datalist (pd.DataFrame): list of files with a 'Label" column
        type (str): 'gp', 'ip', 'rp', 'U', 'B', V'
        '
    """ 
    #og_badfiles = [str(val) for val in og_badfiles]
    og_badfiles = [str(os.path.basename(val)) for val in og_badfiles]
    files_postaij = sorted([str(val) for val in list(datalist['Label'])])
    postaij_df = pd.DataFrame({"files": files_postaij})
    if(type=='U'):
        files_preaij = sorted(glob(f"/Volumes/harddrive/U_notwirl/*.fits.fz"))
    else:
        files_preaij = sorted(glob(f"/Volumes/harddrive/{type}/aligned/*.fits"))
    preaij_df = pd.DataFrame({"files": files_preaij})
    files_onlyname = [str(os.path.basename(val)) for val in (preaij_df['files'])]
    preaij_df2 = pd.DataFrame({"files": files_onlyname})
    combined = [bool(np.isnan(val)) for val in preaij_df2['files'].value_counts() - postaij_df['files'].value_counts()]
    badfiles = [str(val) for val in preaij_df2[combined]['files'].values.tolist()]
    print('The number of bad files in the', type, 'filter are:', len(badfiles+og_badfiles))
    with open(f'bad_files/bad_{type}files.txt', 'w') as f:
        for item in og_badfiles+badfiles:
            new_string = item.replace("aligned_", "")
            f.write(new_string + '\n')
    return

In [6]:
find_lost(ip_data, "ip", ip_badfiles)
find_lost(gp_data, "gp", gp_badfiles)
find_lost(rp_data, "rp", rp_badfiles)
find_lost(U_data, "U", U_badfiles)

The number of bad files in the ip filter are: 38
The number of bad files in the gp filter are: 2
The number of bad files in the rp filter are: 2
The number of bad files in the U filter are: 15
