In [None]:
# This reads in my and NDL's phase analysis outputs and compares them

# Create 2022 May 28 by E.S.

In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib qt

In [47]:
# read in NDL
df_ndl_epochs = pd.read_csv("./data/spectra_epochs_lc.csv")
df_ndl_phases = pd.read_csv("./data/phases_ndl.csv")

# read in mine
df_mine = pd.read_csv("./data/spectra_my_bjds.csv")

In [48]:
# make specific
df_mine["my_spec_bjd"] = np.subtract(df_mine["bjd"],2400000) # subtract to compare with NDL
df_mine["my_phase"] = df_mine["phasemod"]
df_mine["spec_file"] = df_mine["file"] # the original spectrum file name
df_ndl_epochs["spec_file"] = df_ndl_epochs["filenames"] # the original spectrum file name (no change by NDL)
df_ndl_epochs["ndl_spec_bjd"] = df_ndl_epochs["bjd"]
df_ndl_phases["ndl_phase"] = df_ndl_phases["Phases"]

In [49]:
# extract the "cur" number (note that "filenames" here refers to NDL's files, not my spectra numbers)
##df_ndl_epochs["number_cur"] = df_ndl_epochs["filenames"].str.split(".fits").str[-2].str[-2:]#extract('(\d+)')
# remove redundant rows with "c.fits" in filenames col
df_ndl_epochs = df_ndl_epochs.loc[df_ndl_epochs["filenames"].str.contains(".c.fits")==False]
# convert to ints
df_ndl_epochs["number_cur"] = df_ndl_epochs["#"].astype(int)

# extract the "cur" number from NDL's other table
df_ndl_phases["number_cur"] = df_ndl_phases["#Name"].str.split("_").str[-1]
#df_ndl_phases["number_cur"] = df_ndl_phases["#Name"].str.extract('(\d+)')
df_ndl_phases["number_cur"] = df_ndl_phases["number_cur"].astype(int)

In [50]:
# extract the star name
df_ndl_epochs["star_name"] = df_ndl_epochs["filenames"].str[:6]
df_ndl_phases["star_name"] = df_ndl_phases["#Name"].str[:6]

In [51]:
# merge NDL's tables with each other based on star name and cur number

df_ndl_merged = df_ndl_epochs.merge(df_ndl_phases, on=["star_name","number_cur"], suffixes=(None,"_y"))

In [52]:
# match NDL net table to my results by spectrum number (#)
df_all_merged = df_mine.merge(df_ndl_merged, how='inner', on=["spec_file"])

In [53]:
# find NDL time baselines, for checking only

df_all_merged["ndl_time_baselines"] = np.subtract(df_all_merged["Epoch_Max"],df_all_merged["ndl_spec_bjd"])
df_all_merged["ndl_baseline_div_period"] = np.divide(df_all_merged["ndl_time_baselines"],df_all_merged["period"])

In [54]:
# for fyi, find error in phases: multiply error in period by number of cycles in the time baseline
df_all_merged["error_my_phase"] = np.multiply(np.abs(df_all_merged["baseline_div_period"]),df_all_merged["err_tot"])

In [55]:
'''
# for checking
plt.clf()
plt.hist(df_all_merged["error_my_phase"], bins=100)
plt.show()
'''

'\n# for checking\nplt.clf()\nplt.hist(df_all_merged["error_my_phase"], bins=100)\nplt.show()\n'

In [56]:
'''
# for checking
plt.scatter(df_all_merged["my_spec_bjd"],df_all_merged["ndl_spec_bjd"])
plt.plot([56200,56500],[56200,56500], linestyle=":", color="gray")
plt.show()
'''

'\n# for checking\nplt.scatter(df_all_merged["my_spec_bjd"],df_all_merged["ndl_spec_bjd"])\nplt.plot([56200,56500],[56200,56500], linestyle=":", color="gray")\nplt.show()\n'

In [57]:
'''
# for checking
plt.scatter(df_all_merged["Period"],df_all_merged["T_final"])
plt.plot([0,1],[0,1], linestyle=":", color="gray")
plt.show()
'''

'\n# for checking\nplt.scatter(df_all_merged["Period"],df_all_merged["T_final"])\nplt.plot([0,1],[0,1], linestyle=":", color="gray")\nplt.show()\n'

In [59]:
# for comparing NDL and my phases, and troubleshooting disagreement
'''
plt.clf()
plt.scatter(df_all_merged["my_phase"],df_all_merged["ndl_phase"])

for i in range(0,len(df_all_merged)):

    plt.annotate(df_all_merged["spec_file"].loc[i],
                 xy=(df_all_merged["my_phase"].loc[i],df_all_merged["ndl_phase"].loc[i]))

    plt.annotate(np.round(df_all_merged["ndl_baseline_div_period"].loc[i],2),
                 xy=(df_all_merged["my_phase"].loc[i],df_all_merged["ndl_phase"].loc[i]))

#plt.scatter(np.subtract(1.,df_all_merged["my_phase"]),df_all_merged["ndl_phase"])
plt.plot([0,1],[0,1], linestyle=":", color="gray")
plt.show()
'''

In [60]:
df_all_merged.to_csv("./data/junk.csv")