In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from exod.utils.path import savepaths_combined
from scipy.stats import ttest_rel

from exod.post_processing.util import get_lc

In [None]:
df_evt     = pd.read_csv(savepaths_combined['evt_info'])
df_dc      = pd.read_csv(savepaths_combined['dc_info'])
df_lc_feat = pd.read_csv(savepaths_combined['lc_features'])
df_lc_idx  = pd.read_csv(savepaths_combined['lc_idx'], index_col='Unnamed: 0')
df_regions = pd.read_csv(savepaths_combined['regions'])

In [None]:
def filt_lc_high_start(df_lc):
    flag = False
    return flag

In [None]:
df_lc = get_lc(key="('0911990501_0_5_0.2_2.0', '1')", df_lc_idx=df_lc_idx)
df_lc

In [None]:
plt.figure(figsize=(10,3))
plt.plot(df_lc['t0'], df_lc['n'], color='black')
plt.plot(df_lc['t0'], df_lc['mu'], color='red')

In [None]:
def is_mu_significantly_higher(df_lc, time_col='t0', series1='n', series2='mu', time_limit=5000, alpha=0.05):
    """
    Determines if the mean value of `mu` is significantly higher than the mean value of `n` 
    within the first 5000 seconds of observation.
    
    Parameters:
    - df_lc: DataFrame containing the time series data.
    - time_col: The name of the time column (default is 't0').
    - series1: The name of the first series (default is 'n').
    - series2: The name of the second series (default is 'mu').
    - time_limit: The time limit for filtering the data (default is 5000 seconds).
    - alpha: Significance level for the t-test (default is 0.05).
    
    Returns:
    - A boolean indicating whether `mu` is significantly higher than `n`.
    """
    
    # Filter the data for the first 5000 seconds
    df_filtered = df_lc[df_lc[time_col] <= time_limit]
    
    # Extract the relevant series
    n_values = df_filtered[series1]
    mu_values = df_filtered[series2]
    
    # Perform a paired t-test
    t_stat, p_value = ttest_rel(mu_values, n_values)
    
    # Check if the p-value is less than the significance level and the mean of mu is greater than n
    if p_value < alpha and mu_values.mean() > n_values.mean():
        return True
    else:
        return False

In [None]:
is_mu_significantly_higher(df_lc)