In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
from exod.utils.path import savepaths_combined

In [None]:
results = {}
for k, v in savepaths_combined.items():
    results[k] = pd.read_csv(v, dtype={'obsid': 'str'})

In [None]:
np.nanmean(results['lc']['B_peak_log'])

In [None]:
print(f"Number of Observations     : {len(results['run_info']['obsid'].unique())}")
print(f"Number of Simulations      : {len(results['run_info'])}")
print(f"Number of Detected Regions : {len(results['regions'])}")
print(f"Number of Data Cells       : {results['dc_info']['total_values'].sum():.2e}")
print(f"Total Exposure             : {results['evt_info']['exposure'].sum():.2e} s")

# Statistics for 'mu' column in the light curve data
n_mean               = results['lc']['n'].mean()
n_median             = results['lc']['n'].median()
n_std                = results['lc']['n'].std()
mu_mean              = results['lc']['mu'].mean()
mu_median            = results['lc']['mu'].median()
mu_std               = results['lc']['mu'].std()
B_peak_log_mean      = results['lc']['B_peak_log'].replace([np.inf, -np.inf], np.nan).mean()
B_peak_log_median    = results['lc']['B_peak_log'].replace([np.inf, -np.inf], np.nan).median()
B_peak_log_std       = results['lc']['B_peak_log'].replace([np.inf, -np.inf], np.nan).std()
B_eclipse_log_mean   = results['lc']['B_eclipse_log'].replace([np.inf, -np.inf], np.nan).mean()
B_eclipse_log_median = results['lc']['B_eclipse_log'].replace([np.inf, -np.inf], np.nan).median()
B_eclipse_log_std    = results['lc']['B_eclipse_log'].replace([np.inf, -np.inf], np.nan).std()

print('')
print('Lightcurve Data:')
print('----------------')
print(f'n         : mean={n_mean:.2f} std={n_std:.2f} median={n_median:.2f}')
print(f'mu        : mean={mu_mean:.2f} std={mu_std:.2f} median={mu_median:.2f}')
print(f'B_peak    : mean={B_peak_log_mean:.2f} std={B_peak_log_std:.2f} median={B_peak_log_median:.2f}')
print(f'B_eclipse : mean={B_eclipse_log_mean:.2f} std={B_eclipse_log_std:.2f} median={B_eclipse_log_median:.2f}')


In [None]:
arr1 = results['lc']['B_peak_log']
arr1 = arr1[arr1 > 0.001].replace([np.inf, -np.inf], np.nan)

arr2 = results['lc']['B_eclipse_log']
arr2 = arr2[arr2 > 0.001].replace([np.inf, -np.inf], np.nan)

plt.figure(figsize=(5,5))
plt.hist(arr1, bins=np.linspace(0,10,100), histtype='step', label=r'$\mathrm{log}_{10}(B_{\mathrm{peak}})$')
plt.hist(arr2, bins=np.linspace(0,10,100), histtype='step', label=r'$\mathrm{log}_{10}(B_{\mathrm{eclipse}})$')
plt.axvline(5.94, color='red', ls='dotted', lw=1.0, label=r'$3 \sigma$ (5.94)')
plt.title('Bayes Factor for all lightcurve points')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(5,5))
x, y = 'exposure', 'mean_rate'
df   = 'evt_info'
plt.scatter(results[df][x], results[df][y], s=10, marker='+', color='black')
plt.xlabel(x)
plt.ylabel(y)
plt.tight_layout()
plt.title('Exposure vs Mean count Rate')
plt.show()

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(results['regions']['X'], results['regions']['Y'], marker='+', color='black')
plt.title('Position of detected regions in X-Y')
plt.xlabel('X')
plt.ylabel('Y', rotation=0)
plt.xlim(5000,50000)
plt.ylim(5000,50000)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(5,5))
x, y = 'bti_frac', 'bccd_frac'
df = 'dc_info'
plt.scatter(results[df][x], results[df][y], s=10, marker='+', color='black')
plt.xlabel(x)
plt.ylabel(y)
plt.tight_layout()
plt.title('Fraction of Bad time intervals frames vs Fraction of bad ccd frames')
plt.show()

In [None]:
df_lc      = results['lc'][mask]
len(df_lc.groupby(['runid', 'label']).groups.keys())

In [None]:
mask       = results['lc']['runid'].str.contains('_5_')
df_lc      = results['lc'][mask]
df_lc['i'] = df_lc.groupby(['runid','label']).cumcount()
df_lc      = df_lc.set_index(['runid', 'label', 'i'])
df_lc

In [None]:
X = df_lc['n'].unstack().values
X = TimeSeriesScalerMeanVariance().fit_transform(X)
X = np.where(X==np.nan, 0, X)
X

In [None]:
X.shape

In [None]:
km = TimeSeriesKMeans(n_clusters=3, verbose=True, random_state=42)
y_pred = km.fit_predict(X)

In [None]:
y_pred

In [None]:
import numpy
import matplotlib.pyplot as plt

from tslearn.clustering import TimeSeriesKMeans
from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMeanVariance, \
    TimeSeriesResampler

In [None]:
# Author: Romain Tavenard
# License: BSD 3 clause



seed = 0
numpy.random.seed(seed)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X_train = X_train[y_train < 4]  # Keep first 3 classes
numpy.random.shuffle(X_train)
# Keep only 50 time series
X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train[:50])
# Make time series shorter
X_train = TimeSeriesResampler(sz=40).fit_transform(X_train)
sz = X_train.shape[1]

# Euclidean k-means
print("Euclidean k-means")
km = TimeSeriesKMeans(n_clusters=3, verbose=True, random_state=seed)
y_pred = km.fit_predict(X_train)

plt.figure()
for yi in range(3):
    plt.subplot(3, 3, yi + 1)
    for xx in X_train[y_pred == yi]:
        plt.plot(xx.ravel(), "k-", alpha=.2)
    plt.plot(km.cluster_centers_[yi].ravel(), "r-")
    plt.xlim(0, sz)
    plt.ylim(-4, 4)
    plt.text(0.55, 0.85,'Cluster %d' % (yi + 1),
             transform=plt.gca().transAxes)
    if yi == 1:
        plt.title("Euclidean $k$-means")

# DBA-k-means
print("DBA k-means")
dba_km = TimeSeriesKMeans(n_clusters=3, n_init=2, metric="dtw", verbose=True, max_iter_barycenter=10, random_state=seed)
y_pred = dba_km.fit_predict(X_train)

for yi in range(3):
    plt.subplot(3, 3, 4 + yi)
    for xx in X_train[y_pred == yi]:
        plt.plot(xx.ravel(), "k-", alpha=.2)
    plt.plot(dba_km.cluster_centers_[yi].ravel(), "r-")
    plt.xlim(0, sz)
    plt.ylim(-4, 4)
    plt.text(0.55, 0.85,'Cluster %d' % (yi + 1),
             transform=plt.gca().transAxes)
    if yi == 1:
        plt.title("DBA $k$-means")

# Soft-DTW-k-means
print("Soft-DTW k-means")
sdtw_km = TimeSeriesKMeans(n_clusters=3,
                           metric="softdtw",
                           metric_params={"gamma": .01},
                           verbose=True,
                           random_state=seed)
y_pred = sdtw_km.fit_predict(X_train)

for yi in range(3):
    plt.subplot(3, 3, 7 + yi)
    for xx in X_train[y_pred == yi]:
        plt.plot(xx.ravel(), "k-", alpha=.2)
    plt.plot(sdtw_km.cluster_centers_[yi].ravel(), "r-")
    plt.xlim(0, sz)
    plt.ylim(-4, 4)
    plt.text(0.55, 0.85,'Cluster %d' % (yi + 1),
             transform=plt.gca().transAxes)
    if yi == 1:
        plt.title("Soft-DTW $k$-means")

plt.tight_layout()
plt.show()


In [None]:
df_lc.set_index(['runid', 'label'])


In [None]:
X

In [None]:
# get the peaks in the B_peak

results['lc']
mask = results['lc']['runid'].str.contains('_50_')
df_lc = results['lc'][mask]
df_lc




In [None]:
df_lc = df_lc.set_index(['runid', 'label'])

In [None]:
sub

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
obsid_last = ''
for i in df_lc.index.levels[0]:
    obsid, subset, t_bin, E_lo, E_hi = i.split('_')
    label = sub.index[0]
    sub = df_lc.loc[i]
    sub = sub[sub.index == 0]
    #n  = sub['n'].values
    #mu = sub['mu'].values
    
    
        
        
    plt.plot(sub['time'], sub['n'], label=f'{obsid} ({subset}) src={label} E={E_lo}-{E_hi}')
    plt.legend()
    if obsid!=obsid_last:
        plt.figure(figsize=(15,5))
        
    
    obsid_last = obsid
    plt.show()

In [None]:
sub['B_peak_log']

In [None]:
sub