# PHT EB Candidates - Comparison with TESS EB

[TESS EB Catalog](http://tessebs.villanova.edu/) uses PHT #eclipsingbinary tagging as one of the input sources.

In [1]:
from importlib import reload

from IPython.display import display, HTML, clear_output

import numpy as np

import pandas as pd
from pandas.io.clipboard import clipboard_set

import matplotlib.pyplot as plt

import catalog_stats
import dashboard_utils
from dashboard_utils import CAT_COLS_COMMON

display(HTML("<style>.container { width:99% !important; }</style>"))

- To compare with the result from  the published [TESS EB catalog](https://ui.adsabs.harvard.edu/abs/2022ApJS..258...16P/abstract), covering sectors 1 - 26, we use a subset of PHT EB Catalog that has comparable coverage.
- Furthermore, we only consider the subset that has decent accuracy (`eb_score >= 3`)

In [2]:
reload(dashboard_utils)

df_catplus = dashboard_utils.join_pht_eb_candidate_catalog_with(dashboard_utils.get_catalog("pht_eb"), ["tesseb"])

# Also exclude those observed both in year 1 and year 3, to be on the conservative side
df_catplus = df_catplus[df_catplus["max_sector"] <= 26]

print("Num of TICs:", len(df_catplus))

min_eb_score = 3
res, report, styler = catalog_stats.estimate_num_ebs_not_in_catalog(df_catplus, min_eb_score=min_eb_score);
display(report)

df_subset = df_catplus[df_catplus["eb_score_group"] == "03+"]
display(HTML(f"Num of TICs with decent accuracy: {len(df_subset)}"))

Num of TICs: 7874


Unnamed: 0_level_0,count,count,count,count,count
Unnamed: 0_level_1,tic_id,tic_id,tic_id,tic_id,tic_id
is_eb_catalog,T,F,-,Totals,T/(T+F)
eb_score_group,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
03+,1514,88,665,2267,0.945069
02-,1205,782,3620,5607,0.606442
Totals,2719,870,4285,7874,0.757593


### Those in PHT EB but not in TESS EB

- are they false positives in PHT EB, or could it be that TESS EB miss those targets?
  - TESS EB does miss a noticeable portion of them:
  - 246 of them have been classified as EB in SIMBAD, VSX or ASAS-SN 
- If TESS EB miss those targets, are there some (systematic) reasons?

In [3]:
report = df_subset.pivot_table(
    index=["TESSEB_Is_In"],
    columns="is_eb_catalog",
    values=["tic_id"],
    aggfunc=["count"],
    margins=True,
    margins_name="Totals",
)

report

Unnamed: 0_level_0,count,count,count,count
Unnamed: 0_level_1,tic_id,tic_id,tic_id,tic_id
is_eb_catalog,-,F,T,Totals
TESSEB_Is_In,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
F,136,33,246,415
T,529,55,1268,1852
Totals,665,88,1514,2267


- 415 out of 2267 TICs are not in TESS EB. Out of 415
    - 246 of them are already classified as EB in SIMBAD , VSX or ASAS-SN.
    - 166 (136 + 33) of them remain uncertain, and require further triage

### Other considerations

TODO:
- TESS EB false positives due to NEB
- TESS EB eclipse duration accuracy (the TESS EB paper had some discussion)


### Appendix: Sample TICs

#### Sample TICs that TESS EB missed already classified as EB in SIMBAD, VSX or ASAS-SN

- they are likely real miss in TESS EB, given the existing classification
- possibly worth some investigation on if there are some systematic issues leading to their misses.

In [9]:
df = df_subset
df = df[(df["TESSEB_Is_In"] != "T")]
df = df[(df["is_eb_catalog"] == "T")]

start, end = 0, 15

# sample top TICs
df = df.sort_values(["eb_score", "tic_id"], ascending=[False, True])
display(dashboard_utils.style(df[["best_subject_img_id", ] + CAT_COLS_COMMON][start:end], show_thumbnail=True))

# sample average-ish TICs (eb_score is 3)
df = df.sort_values(["eb_score", "tic_id"], ascending=[True, True])
display(dashboard_utils.style(df[["best_subject_img_id", ] + CAT_COLS_COMMON][start:end], show_thumbnail=True))

best_subject_img_id,tic_id,best_subject_id,is_eb_catalog,eb_score,SIMBAD_MAIN_ID,SIMBAD_OTYPES,SIMBAD_Is_EB,VSX_OID,VSX_Type,VSX_Is_EB,VSX_Period,ASASSN_Name,ASASSN_URL,ASASSN_Type,ASASSN_Per,TESSEB
,418292123,48933995,T,9,,,-,130438.0,EA|EB,T,0.754175,J034322.45+763934.3,details,EA,0.754237,details
,359552286,48215552,T,8,,,-,1716679.0,EA,T,0.62537,,,,,details
,233060434,48227121,T,7,,,-,,,-,,J175855.03+621138.4,details,EA,0.825673,details
,298666271,37754074,T,7,V* HI Dra,RR*|**|V*|SB*,F,13882.0,EW,T,0.597419,J183324.00+584222.7,details,VAR,509.238255,details
,322606029,48940412,T,7,HD 336451,SB*,-,2213632.0,EA,T,5.250288,,,,,details
,459915346,48940325,T,7,BD+86 130,,-,1546276.0,EA,T,8.39725,,,,,details
,20215452,46386424,T,6,,,-,630374.0,MISC,-,0.595757,J150916.05+360200.2,details,EA,0.59575,details
,51960132,46413812,T,6,,,-,,,-,,J005539.61+595925.9,details,EB,0.844396,details
,52040219,46454230,T,6,V* V1061 Cas,EB*|V*,T,172603.0,EA,T,3.649365,J005613.94+650714.7,details,ROT:,0.0,details
,115288764,46416346,T,6,BD+44 2605,,-,629192.0,EA,T,1.188968,,,,,details


best_subject_img_id,tic_id,best_subject_id,is_eb_catalog,eb_score,SIMBAD_MAIN_ID,SIMBAD_OTYPES,SIMBAD_Is_EB,VSX_OID,VSX_Type,VSX_Is_EB,VSX_Period,ASASSN_Name,ASASSN_URL,ASASSN_Type,ASASSN_Per,TESSEB
,4735737,43266413,T,3,BD+29 2264,EB*|V*,T,44113,E:,T,,J121114.01+290627.0,details,ROT:,0.0,details
,9588485,48948966,T,3,* u Her,UV|EB*|SB*|**|V*,T,15899,EA/SD,T,2.051026,,,,,details
,22513851,48936710,T,3,TYC 2625-1707-1,SB*,-,2213625,EA,T,18.82429,,,,,details
,23936839,48951552,T,3,1SWASP J182416.12+351428.1,EB*|V*|EB?,T,229591,EW,T,0.275312,J182416.01+351428.3,details,EW,0.275305,details
,30631330,35169600,T,3,ASAS J050048-7029.8,EB*|V*,T,76832,EC,T,0.38735,J050047.90-702951.5,details,EW,0.387355,details
,31529171,35172184,T,3,UCAC4 110-010803,,-,633264,EW,T,0.461798,J055839.84-681145.8,details,EW,0.46179,details
,41694016,46380352,T,3,V* CV Boo,EB*|V*|SB*,T,4425,EA,T,0.846994,J152619.54+365853.5,details,EA,0.847,details
,43791458,48935929,T,3,CRTS J174150.8+170130,EB*,T,390494,EA,T,0.460312,J174150.84+170130.8,details,EA,0.460309,details
,47629196,48941818,T,3,TYC 3531-115-1,,-,229619,EA,T,2.872034,J183601.84+484235.7,details,EA,2.871716,details
,48446609,48937193,T,3,HS 1857+5144,CV*|blu|EB?,T,226887,R,F,0.266334,J185832.25+514857.5,details,R,0.266334,details


- no obvious pattern found yet, e.g., their VSX classifications (most of them have one) follow general trend (EA followed by EW)

In [16]:
df_vsx = df[~pd.isna(df["VSX_Type"])]
display(HTML(f"Top 5 VSX Classification for those not in TESS EB ( {len(df_vsx)} out of {len(df)} TICs with VSX classification))"))
df_vsx.groupby("VSX_Type").agg(
    num_tics=("tic_id", "count"),
).sort_values("num_tics", ascending=False).head(5)


Unnamed: 0_level_0,num_tics
VSX_Type,Unnamed: 1_level_1
EA,101
EW,44
EC,9
EB,9
EA/RS,7


#### Sample TICs that TESS EB missed, but not in SIMBAD, VSX or ASAS-SN

- they require more vetting, as they tend to have a mix of false positives and genuine EBs.

In [5]:
df = df_subset
df = df[(df["TESSEB_Is_In"] != "T")]
df = df[(df["is_eb_catalog"] != "T")]


start, end = 0, 15

# sample top TICs
df = df.sort_values(["eb_score", "tic_id"], ascending=[False, True])
display(dashboard_utils.style(df[["best_subject_img_id", ] + CAT_COLS_COMMON][start:end], show_thumbnail=True))

# sample average-ish TICs (eb_score is 3)
df = df.sort_values(["eb_score", "tic_id"], ascending=[True, True])
display(dashboard_utils.style(df[["best_subject_img_id", ] + CAT_COLS_COMMON][start:end], show_thumbnail=True))


best_subject_img_id,tic_id,best_subject_id,is_eb_catalog,eb_score,SIMBAD_MAIN_ID,SIMBAD_OTYPES,SIMBAD_Is_EB,VSX_OID,VSX_Type,VSX_Is_EB,VSX_Period,ASASSN_Name,ASASSN_URL,ASASSN_Type,ASASSN_Per,TESSEB
,340633943,32017645,-,15,TYC 8560-2009-1,,-,,,-,,,,,,details
,29658499,33285171,-,9,CD-34 6375,,-,,,-,,,,,,details
,230024775,48944008,-,8,,,-,,,-,,,,,,details
,9779230,48225299,-,7,HD 157465,X,-,,,-,,,,,,details
,236761861,48230988,-,7,,,-,,,-,,,,,,details
,364116821,38702351,-,7,TYC 4455-633-1,,-,,,-,,,,,,details
,383598522,48224493,-,7,G 169-17,V*|SB*,-,,,-,,,,,,details
,76073981,48934888,-,6,TYC 3107-2249-1,V*|SB*,-,,,-,,,,,,details
,167717852,30859446,-,6,UCAC2 14975036,,-,,,-,,,,,,details
,207385593,44564164,-,6,LP 379-62,,-,,,-,,,,,,details


best_subject_img_id,tic_id,best_subject_id,is_eb_catalog,eb_score,SIMBAD_MAIN_ID,SIMBAD_OTYPES,SIMBAD_Is_EB,VSX_OID,VSX_Type,VSX_Is_EB,VSX_Period,ASASSN_Name,ASASSN_URL,ASASSN_Type,ASASSN_Per,TESSEB
,2013258,40154642,-,3,,,-,,,-,,,,,,details
,4918918,41023496,-,3,Wolf 327,,-,,,-,,,,,,details
,8153514,41037854,-,3,BD+37 2070,RG*,-,,,-,,,,,,details
,21818238,32590361,-,3,TYC 6615-509-1,,-,,,-,,,,,,details
,22877676,44572924,-,3,BD+38 2467,**,-,,,-,,,,,,details
,25537276,37867108,-,3,HD 221009,,-,,,-,,,,,,details
,46430860,31380708,-,3,UPM J0535-1706,V*,-,,,-,,,,,,details
,51961599,46383529,F,3,HD 5408,UV|**|SB*,-,1540323.0,HB,F,4.241,,,,,details
,52790360,48223562,-,3,TYC 4038-1167-1,,-,,,-,,,,,,details
,73632590,33297475,F,3,TYC 7196-2629-1,,-,,,-,,J102137.89-365503.3,details,ROT:,294.907302,details
