In [None]:
import time
import warnings
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from scipy.spatial import KDTree
import pandas as pd
import astropy.units as u
from tqdm import tqdm
from astropy.coordinates import SkyCoord
from astropy.table import vstack, Table
from astroquery.simbad import Simbad
from scipy.spatial import KDTree
from scipy.stats import binned_statistic

from astroquery.simbad.core import BlankResponseWarning
from astropy.coordinates import SkyCoord
import scienceplots

from exod.post_processing.crossmatch import crossmatch_unique_regions, crossmatch_dr14_slim
#from exod.post_processing.cluster_regions import get_unique_regions, ra_dec_to_xyz
from exod.post_processing.main import calc_df_lc_feat_filter_flags
from exod.post_processing.results_manager import ResultsManager
from exod.utils.plotting import set_latex_font
from exod.post_processing.crossmatch import crossmatch_unique_regions
from exod.xmm.bad_obs import obsids_to_exclude
from exod.post_processing.util import get_lc
from exod.utils.path import savepaths_combined
warnings.filterwarnings("ignore", category=BlankResponseWarning)

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [None]:
rm = ResultsManager()

df_evt     = pd.read_csv(savepaths_combined['evt_info'])
df_dc      = pd.read_csv(savepaths_combined['dc_info'])
df_lc_feat = pd.read_csv(savepaths_combined['lc_features'])
df_lc_idx  = pd.read_csv(savepaths_combined['lc_idx'], index_col='Unnamed: 0')
df_regions = pd.read_csv(savepaths_combined['regions'])

tab_exod_cat = Table.read('../data/results_combined/exod_catalogue/EXOD_DR1_cat.fits')
tab_exod_cat_unique = Table.read('../data/results_combined/exod_catalogue/EXOD_DR1_cat_unique.fits')

df_lc_feat        = rm.df_lc_features
df_regions_unique = rm.df_regions_unique
dfs_cmatch        = crossmatch_unique_regions(df_regions_unique.reset_index(), clobber=False)
df_cmatch_xmm     = dfs_cmatch['XMM DR14']
df_lc_feat['SEP_ARCSEC'] = df_cmatch_xmm['SEP_ARCSEC']

df_regions['filt_exclude_obsid'] = df_lc_feat['filt_exclude_obsid']
df_regions['filt_tbin_5_n_l_5']  = df_lc_feat['filt_tbin_5_n_l_5']
df_regions['filt_5sig']          = df_lc_feat['filt_5sig']
df_regions['obsid']              = df_lc_feat['obsid']

In [None]:
interesting = ['0810200501_0_50_2.0_12.0' # Hard source, recurring peaks?
               '0820860101_0_50_0.2_2.0']

interesting_fast_flares = [('0725300153_0_5_0.2_2.0', '0'), # Multiple bin Flare
                           ('0861080101_0_5_0.2_2.0', '0'), # 
                           ('0748591131_0_5_0.2_2.0', '0'),
                           ('0748591134_0_5_0.2_2.0', '0'),
                           ('0748591136_0_5_0.2_2.0', '0'),
                           ('0800730701_0_5_0.2_2.0', '1'), # A star switching on in X-rays!? HD 3447 (D=164.1438pc GAIA DR3)
                           ('0500940101_0_5_0.2_2.0', '1'), # Multiple bin Flare
                           ('0761070101_0_5_0.2_2.0', '0'), # Multiple bin Flare
                           ('0765080801_0_5_0.2_2.0', '0'), # Multiple bin Flare
                           ('0823593901_0_5_0.2_2.0', '1'), # Multiple bin Flare
                           ('0413780301_0_5_0.2_2.0', '0'), # Multiple bin Flare
                           ('0600320101_0_5_0.2_2.0', '4'), # Multiple bin Flare
                           ('0803990401_1_5_0.2_2.0', '0'), ## 
                           ('0600320101_0_5_0.2_2.0', '2'), ##
                           ('0744413901_0_5_0.2_2.0', '2'), ## 
                           ('0883770701_0_5_0.2_2.0', '3'), ## 
                           ('0784330201_0_5_0.2_2.0', '0'), ##
                           ('0148960901_0_5_0.2_2.0', '0'), ## 
                           ('0652660301_0_5_0.2_2.0', '0'), ## 
                           ('0881210101_0_5_0.2_2.0', '3'), ## 
                           ('0860303301_0_5_0.2_2.0', '2'), ## 
                           ('0550970101_0_5_0.2_2.0', '4'), ## 
                           ('0202130101_0_5_0.2_2.0', '2'), ## 
                           ('0886061201_0_5_0.2_2.0', '0'), ## 
                           ('0103060201_0_5_0.2_2.0', '0'), ## 
                           ('0511580401_0_5_0.2_2.0', '1'), ## 
                           ('0501790101_0_5_0.2_2.0', '3'), ## 
                           ('0109460601_0_5_0.2_2.0', '0'), ## 
                           ('0800190101_0_5_0.2_2.0', '1'), ## 
                           ('0932391001_0_5_0.2_2.0', '0'), ## 
                           ('0111120101_0_5_0.2_2.0', '1'), ## 
                           ('0502500101_0_5_0.2_2.0', '1'), ## 
                           ('0881910101_0_5_0.2_2.0', '18'), ## 
                           ('0691550101_0_5_0.2_2.0', '0')] ## 

# Interesting Sources
keys_int = [('0671930101_0_5_2.0_12.0', '0')] # HD 200560 Binary Star

#('0810200501_0_50_2.0_12.0', '0')
#('0820860101_0_50_0.2_2.0', '0')

In [None]:
fig = plt.figure(figsize=(3.5, 10))
i=0
j=0
for key in interesting_fast_flares:
    df_lc = get_lc(key, df_lc_idx)
    max_idx = np.argmax(df_lc['n'])+i
    x = 50+np.arange(len(df_lc)) - max_idx
    if i>48:
        plt.text(20,j+0.45,s=f'{key[0][0:10]}')
    else:
        plt.text(-55,j+0.45,s=f'{key[0][0:10]}')
    plt.step(x, df_lc['n']+j, lw=1.0)
    j+=10
    i+=3

plt.xlim(-60,60)
plt.ylim(0, 360)
plt.yticks([])

In [None]:
df_lc_feat

In [None]:
from exod.processing.bayesian_computations import B_peak_log

In [None]:
fig, ax = plt.subplots(2,1, height_ratios=(2,1), figsize=(8,5), sharex=True)
ax[0].step(range(len(df_lc)), df_lc['n'].values, color='black', label='Observed Counts (N)')
ax[0].step(range(len(df_lc)), df_lc['mu'].values + 0.5*np.random.random(size=len(df_lc)), color='red', label=r'Expected Counts ($\mu$)')
ax[1].step(range(len(df_lc)), [B_peak_log(n=df_lc['n'].values[i], mu=df_lc['mu'].values[i]) for i in range(len(df_lc))], color='green')
ax[0].set_ylabel('Counts')
ax[0].legend()

ax[1].step(range(len(df_lc)), [B_peak_log(n=df_lc['n'].values[i], mu=df_lc['mu'].values[i]) for i in range(len(df_lc))], color='green', label='Significance')
ax[1].set_ylabel('Bayes Factor')
ax[1].set_xlabel('Time')
ax[1].axhline(5.94, lw=1.0, ls='--', color='black')
ax[1].text(x=760, y=6.1, s=r'$5 \sigma$', fontsize=20)
for a in ax:
    a.set_xlim(750,1000)
    a.legend()

plt.subplots_adjust(hspace=0)
plt.savefig('../data/plots/Significance_plot.png')
plt.savefig('../data/plots/Significance_plot.pdf')


In [None]:
df_lc = get_lc(('0800730701_0_5_0.2_2.0', '1'), df_lc_idx)
plt.figure(figsize=(15,1))
plt.step(df_lc['t0'], df_lc['n'])
plt.step(df_lc['t0'], df_lc['mu'], color='red')
plt.axvline(max_idx)
plt.show()

plt.figure(figsize=(8,5))
plt.step(range(len(df_lc)), df_lc['n'].values, color='black', label='Observed Counts (N)')
plt.step(range(len(df_lc)), df_lc['mu'].values + 0.5*np.random.random(size=len(df_lc)), color='red', label=r'Expected Counts ($\mu$)')
plt.step(range(len(df_lc)), [B_peak_log(n=df_lc['n'].values[i], mu=df_lc['mu'].values[i]) for i in range(len(df_lc))], color='green')
plt.xlim(750,1000)
plt.legend()


In [None]:
mask = (
    ~df_lc_feat['filt_exclude_obsid'] &
    #~df_lc_feat['filt_tbin_5_n_l_5'] &
    #~df_lc_feat['n_max_isolated_flare'] &
    #(df_lc_feat['n_max'] > 10) &
    (df_lc_feat['mu_max'] > 5) &
    (df_lc_feat['mu_mean'] > 1) &
    
    #(df_lc_feat['sigma_max_B_peak'] > 3) &
    #(df_lc_feat['sigma_max_B_eclipse'] > 3) &
    
    #(df_lc_feat['n_max'] < 20) &
    #(df_lc_feat['n_mean'] < 1.0) &
    #(df_lc_feat['n_std'] > 5) &
    #(df_lc_feat['n_skew'] > 2) &
    #(df_lc_feat['n_kurt'] > 5) &
    #(df_lc_feat['n_sum'] < 500) &
    #(df_lc_feat['n_min'] > 3) &
    #(df_lc_feat['SEP_ARCSEC'] > 20) &
    (df_lc_feat['runid'].str.contains('_5_0.2_2.0'))
)

# mask = (df_lc_feat['n_max'] > 20) & (df_lc_feat['n_max'] < 100) & (df_lc_feat['n_mean'] < 2) & (df_lc_feat['runid'].str.contains('_5_'))
# mask = df_lc_feat['runid'].str.contains('_5_2.0_12.0') & (df_lc_feat['n_max'] > 5) & (df_lc_feat['n_max'] < 20) #& (df_lc_feat['SEP_ARCSEC'] > 40)
# mask = (df_lc_feat['n_max'] > 20) & (df_lc_feat['n_mean'] < 2) & (df_lc_feat['runid'].str.contains('_200_'))
# mask = (df_lc_feat['n_max'] > 20) & (df_lc_feat['n_mean'] < 2) & (df_lc_feat['runid'].str.contains('0'))

sub = df_lc_feat[mask]
sub = sub.sort_values('n_max')
print(f'{len(sub)} lcs found!')


for idx, row in sub.iterrows():
    reg_row = df_regions.loc[row.name]
    print(f"{row['key']} {row['SEP_ARCSEC']} {reg_row['ra_deg']} {reg_row['dec_deg']}")
    print(f'https://sky.esa.int/esasky/?target={row["SEP_ARCSEC"]}+{reg_row["ra_deg"]}&hips=DSS2+color&fov=0.2148312729884265&cooframe=J2000&sci=true&lang=en')
    df_lc = get_lc(row['key'], df_lc_idx)

    max_idx = row['n_max_idx']
    
    plt.figure(figsize=(15,1))
    plt.step(df_lc['t0'], df_lc['n'])
    plt.step(df_lc['t0'], df_lc['mu'], color='red')
    plt.axvline(max_idx)
    plt.show()
    
    plt.figure()
    
    plt.step(range(len(df_lc)), df_lc['n'].values)
    plt.step(range(len(df_lc)), df_lc['mu'].values)

    plt.xlim(max_idx-100, max_idx+100)

In [None]:
n = 28913
print(df_cmatch_xmm.loc[n])
print('=========')
print(df_regions.loc[n])
print('=========')
print(df_cmatch_xmm.loc[n])
print('=========')

In [None]:
pd.set_option('display.max_rows', 100)  # You can change 100 to any number you prefer
vc = df_lc_feat['obsid'].value_counts()
vc[vc > 40]

In [None]:
tables['tab_cmatch_xmm'].to_pandas()

In [None]:
df_lc_feat_5sig = df_lc_feat[df_lc_feat['filt_5sig']]

# Reidnex by runid and label
df_regions_idx      = df_regions.set_index(['runid', 'label'])
df_lc_feat_5sig_idx = df_lc_feat_5sig.set_index(['runid', 'label'])

# Get regions that are only 5sig
df_regions_5sig = df_regions_idx.loc[df_lc_feat_5sig_idx.index]
df_regions_5sig

In [None]:
df_unique_reg_5sig = get_unique_regions(df_regions_5sig, clustering_radius=20*u.arcsec)

In [None]:
df_xmm_5sig = crossmatch_dr14_slim(df_regions_5sig)
df_xmm_5sig_20_arcsec = df_xmm_5sig[df_xmm_5sig['SEP_ARCSEC'] > 20]

In [None]:
def find_row_index(df, element):
    for i, row in df.iterrows():
        if element in row['idxs']:
            return i
    return None

In [None]:
sub = df_lc_feat_5sig.iloc[df_xmm_5sig_20_arcsec['IDX_ORIGINAL']]

for idx, row in sub.iterrows():
    key   = row['key']
    obsid = row['runid'].split('_')[0]
    uniuque_reg_id = row['idx
    runid = key.strip(')').strip('(').split(',')[0].strip("'")
    label = int(key.strip(')').strip('(').split(',')[1].strip().strip("'"))
    
    uniuque_reg_id = find_row_index(df_regions_unique, idx)
    reg = df_regions_idx.loc[runid,label]
    key         = row['key']
    start, stop = df_lc_idx.loc[key]
    df_lc       = pd.read_hdf('../data/results_combined/merged_with_dr14/df_lc.h5', start=start, stop=stop)
    
    print(f'{idx} {obsid} {uniuque_reg_id} {runid} {label}')
    
    plt.figure(figsize=(15,3))
    plt.plot(df_lc['time'], df_lc['n'], color='black', label=key)
    plt.plot(df_lc['time'], df_lc['mu'], color='red')
    plt.legend(loc='upper left')
    plt.show()

In [None]:
sub = df_lc_feat_5sig.iloc[df_xmm_5sig_20_arcsec['IDX_ORIGINAL']]

plt.figure(figsize=(15,3))
for idx, row in sub.iterrows():
    key   = row['key']
    obsid = row['runid'].split('_')[0]
    unique_idx = row['unique_idx']
    runid = key.strip(')').strip('(').split(',')[0].strip("'")
    label = int(key.strip(')').strip('(').split(',')[1].strip().strip("'"))
    
    reg = df_regions_idx.loc[runid,label]
    key         = row['key']
    start, stop = df_lc_idx.loc[key]
    df_lc       = pd.read_hdf('../data/results_combined/merged_with_dr14/df_lc.h5', start=start, stop=stop)
    
    
    print(f'{idx} {obsid} {unique_idx} {runid} {label} {xmm['SEP_ARCSEC'][0]} {xmm["WEBPAGE_URL"][0]} ')
    
    t0 = df_lc['time'] - df_lc['time'].min()
    plt.plot(t0, df_lc['n'], color='black', label=key)
    #plt.plot(df_lc['time'], df_lc['mu'], color='red')
plt.legend(loc='upper left')
plt.show()

# 5 Sigma EXOD sources with no xmm dr14 Crossmatch

In [None]:
df_regions_5sig = df_regions[df_lc_feat['filt_5sig']]
cmatch_xmm = crossmatch_dr14_slim(df_regions_5sig)
cmatch_xmm[cmatch_xmm['SEP_ARCSEC'] > 40]

In [None]:
sub = df_lc_feat[(df_lc_feat['filt_5sig'])]
sub = sub[(cmatch_xmm['SEP_ARCSEC'] > 40)]


for idx, row in sub.iterrows():
    key = row['key']
    df_lc = get_lc(key, df_lc_idx)
    
    
    t0 = df_lc['time'] - df_lc['time'].min()
    print(key)
    plt.figure(figsize=(15,3))
    plt.plot(t0, df_lc['n'], color='black', label=key)
    plt.plot(t0, df_lc['mu'], color='red')
    plt.legend(loc='upper left')
    plt.show()