In [1]:
import os, sys

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import astropy.io.ascii as at

In [2]:

def read_validation_results(cluster, date, which=None):
    # Read in my visual inspection results
    if which is None:
        vis_file = f"tables/{cluster}_{date}_results_comments.csv"
    else:
        vis_file = f"tables/{cluster}_{date}_results_comments{which}.csv"
    vis = at.read(vis_file,delimiter=",")
    good = np.where(vis["Select"].mask==False)[0]
    print(len(vis),"total",len(good),"good")
    

#     # Read in all the peaks, for ID'ing third periods
#     peaks = at.read(f"tables/{cluster}_{date}_allpeaks.csv",delimiter=",")

    # Limit the table to only the light curves I analyzed
    vis = vis[good]
    vis.rename_column("\ufefftarget_name","TIC")

#     # Select the final period based on quality flags
#     vis["final_period"] = np.copy(vis["sig_periods"])
#     vis["final_power"] = np.copy(vis["sig_powers"])
#     vis["final_Q"] = np.copy(vis["Q"])
#     # These have to be -9999 because comparison with NaNs gets messed up
#     vis["second_period"] = np.ones_like(vis["final_period"])*-9999
#     vis["second_power"] = np.ones_like(vis["final_power"])*-9999
#     vis["second_Q"] = np.ones_like(vis["final_Q"])*4

#     # If I flaggged the highest peak as bad, but selected another peak,
#     # Select that one instead
#     replace2 = (vis["Q"]==2) & ((vis["Q2"]==1) | (vis["Q2"]==0))
#     replace3 = (vis["Q"]==2) & ((vis["Q3"]==1) | (vis["Q3"]==0))
#     vis["final_period"][replace2] = vis["sec_periods"][replace2]
#     vis["final_Q"][replace2]==vis["Q2"][replace2]
#     vis["final_power"][replace2] = vis["sec_powers"][replace2]

#     ploc0 = np.ones(len(peaks),bool)
#     for i in np.where(replace3)[0]:
#         ploc0[:] = True
#         for col in ["TIC","provenance_name","sequence_number","flux_cols"]:
#             ploc0 = ploc0 & (vis[col][i]==peaks[col])

#         ploc = np.where(ploc0)[0]
#         if len(ploc)>2:
#             # The peaks are sorted in ascending order when I created the peaks
#             # file, so I just need to go backwards within the matching set
#             vis["final_period"][i] = peaks["period"][-3]
#             vis["final_Q"][i]==vis["Q3"][i]
#             vis["final_power"][i] = peaks["power"][-3]
#         else:
#             print(f"Insufficient peaks for {vis['TIC'][i]}!")
#             print(vis["TIC","provenance_name","sequence_number","flux_cols",
#                       "Q","Q2","Q3","Notes"][i])
#             print(peaks[ploc])


#     # The MultiProt column didn't necessarily mean that two good periods
#     # were detected, so use the Q values to find multiperiodic stars
#     good1 = (vis["Q"]==0) | (vis["Q"]==1)
#     good2 = (vis["Q2"]==0) | (vis["Q2"]==1)
#     good3 = (vis["Q3"]==0) | (vis["Q3"]==1)
#     multi_q = (good1 & good2) | (good1 & good3) | (good2 & good3)

#     q2 = ["Q2","Q3","Q3"]

#     for gi in np.where((good1 & good2))[0]:
#         vis["second_period"][gi] = vis["sec_periods"][gi]
#         vis["second_power"][gi] = vis["sec_powers"][gi]
#         vis["second_Q"][gi] = vis["Q2"][gi]

#     for i in np.where((good1 & good3) | (good2 & good3))[0]:
#         ploc0[:] = True
#         for col in ["TIC","provenance_name","sequence_number","flux_cols"]:
#             ploc0 = ploc0 & (vis[col][i]==peaks[col])

#         ploc = np.where(ploc0)[0]
#         if len(ploc)>2:
#             # The peaks are sorted in ascending order when I created the peaks
#             # file, so I just need to go backwards within the matching set
#             vis["second_period"][i] = peaks["period"][-3]
#             vis["second_Q"][i]==vis["Q3"][i]
#             vis["second_power"][i] = peaks["power"][-3]
#         else:
#             print(f"Insufficient peaks for {vis['TIC'][i]}!")
#             print(vis["TIC","provenance_name","sequence_number","flux_cols",
#                       "Q","Q2","Q3","Notes"][i])
#             print(peaks[ploc])

    # Return the output table
    return vis


In [6]:
clusters = ["IC_2391","Collinder_135","NGC_2451A","NGC_2547","IC_2602"]
dates = ["2021-06-22","2021-06-18","2021-06-21","2021-06-21","2021-07-02"]

for i in range(5):
    print("\n\n",clusters[i])
    cluster = clusters[i]
    date = dates[i]
    # Retrieve the two validation catalogs
    cat1 = read_validation_results(cluster,date)
    u1,inv1,ct1 = np.unique(cat1["TIC"],return_inverse=True,return_counts=True)
    n1 = len(u1)
    print(n1,"Unique TIC IDs, validation 1")
    print("Duplicate\n",u1[ct1>1])

    cat2 = read_validation_results(cluster,date,which=2)
    u2,inv2,ct2 = np.unique(cat2["TIC"],return_inverse=True,return_counts=True)
    n2 = len(u2)
    print(n2,"Unique TIC IDs, validation 2")
    print("Duplicate\n",u2[ct2>1])

    print("\n2 not in 1\n",u2[np.isin(u2,u1,invert=True,assume_unique=True)])
    print("\n1 not in 2\n",u1[np.isin(u1,u2,invert=True,assume_unique=True)])



 IC_2391
1505 total 224 good
224 Unique TIC IDs, validation 1
Duplicate
 TIC
---
1505 total 224 good
224 Unique TIC IDs, validation 2
Duplicate
 TIC
---

2 not in 1
 TIC
---

1 not in 2
 TIC
---


 Collinder_135
2833 total 367 good
367 Unique TIC IDs, validation 1
Duplicate
 TIC
---
2833 total 367 good
367 Unique TIC IDs, validation 2
Duplicate
 TIC
---

2 not in 1
 TIC
---

1 not in 2
 TIC
---


 NGC_2451A
2035 total 317 good
317 Unique TIC IDs, validation 1
Duplicate
 TIC
---
2035 total 317 good
317 Unique TIC IDs, validation 2
Duplicate
 TIC
---

2 not in 1
 TIC
---

1 not in 2
 TIC
---


 NGC_2547
1630 total 199 good
199 Unique TIC IDs, validation 1
Duplicate
 TIC
---
1630 total 199 good
199 Unique TIC IDs, validation 2
Duplicate
 TIC
---

2 not in 1
 TIC
---

1 not in 2
 TIC
---


 IC_2602
5130 total 396 good
396 Unique TIC IDs, validation 1
Duplicate
 TIC
---
5130 total 396 good
396 Unique TIC IDs, validation 2
Duplicate
 TIC
---

2 not in 1
 TIC
---

1 not in 2
 TIC
---
