# Calculo FAI


## Packages

In [1]:
from datetime import datetime, timedelta
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter
from mpl_toolkits.mplot3d import Axes3D


## Funciones

### funci√≥n que llama las funciones para calcular FAI

In [28]:

def fai_from_df(df_full: pd.DataFrame,
                fai_temp_range: tuple = (7, 14),
                fai_em_threshold: float = 0.005,
                date_column: str = "date",
                duration: bool = True,
                FAI_duration: int = 3,
                filter_flare_coincidence: bool = True,
                df_flares: pd.DataFrame = None,
                flare_peak_col: str = "peak_time",
                flare_end_col: str = "end_time",
                verbose: bool = True,
            ) -> dict:
    """
    Calcula los FAI en un DataFrame, marcando tanto todos los puntos candidatos
    como los FAI ‚Äúverdaderos‚Äù seg√∫n duraci√≥n m√≠nima opcional.

    Par√°metros
    ----------
    df_full : pd.DataFrame
        DataFrame con columnas 'T_cor', 'EM_cor_norm' y una columna de fecha.
    fai_temp_range : tuple, opcional
        Rango de temperatura coronal (en MK) para el criterio FAI. Default (7, 14).
    fai_em_threshold : float, opcional
        Umbral m√≠nimo de EM_cor_norm para el criterio FAI. Default 0.005.
    date_column : str, opcional
        Nombre de la columna con las fechas. Default "date".
    duration : bool, opcional
        Si True, aplica un criterio adicional de duraci√≥n m√≠nima. Default False.
    FAI_duration : int, opcional
        Duraci√≥n m√≠nima (en pasos consecutivos) para considerar un FAI verdadero. Default 3.
    filter_flare_coincidence : bool, opcional
        Si True, filtra los FAI que coinciden temporalmente con flares. Default False.
    df_flares : pd.DataFrame, opcional
        DataFrame con informaci√≥n de flares, debe contener columnas de peak_time y end_time.
    flare_peak_col : str, opcional
        Nombre de la columna con el tiempo pico del flare. Default "peak_time".
    flare_end_col : str, opcional
        Nombre de la columna con el tiempo final del flare. Default "end_time".
    verbose : bool, opcional
        Si True, imprime informaci√≥n sobre los puntos encontrados.

    Retorna
    -------
    dict
        {
            "df_fai_all": DataFrame con todos los puntos candidatos,
            "df_fai_true": DataFrame con los FAI verdaderos seg√∫n duraci√≥n,
            "df_fai_filtered": DataFrame con FAI verdaderos filtrados (sin coincidencia con flares)
        }
    """

    # Validar columnas requeridas
    required_cols = ["T_cor", "EM_cor_norm", date_column]
    for col in required_cols:
        if col not in df_full.columns:
            raise ValueError(f"Falta la columna requerida: '{col}'")

    # Validar par√°metros de filtrado de flares
    if filter_flare_coincidence:
        if df_flares is None:
            raise ValueError("Cuando filter_flare_coincidence=True, debe proporcionar df_flares")
        required_flare_cols = [flare_peak_col, flare_end_col]
        for col in required_flare_cols:
            if col not in df_flares.columns:
                raise ValueError(f"Falta la columna requerida en df_flares: '{col}'")

    # Copia y formateo seguro de fechas
    df_full = df_full.copy()
    df_full[date_column] = pd.to_datetime(df_full[date_column], errors="coerce")
    
    if filter_flare_coincidence and df_flares is not None:
        df_flares = df_flares.copy()
        df_flares[flare_peak_col] = pd.to_datetime(df_flares[flare_peak_col], errors="coerce")
        df_flares[flare_end_col] = pd.to_datetime(df_flares[flare_end_col], errors="coerce")

    
    # 1Ô∏è‚É£ Crear columna FAI_alert: True si cumple T_cor y EM_cor_norm
    df_full["FAI_alert"] = (df_full["T_cor"].between(*fai_temp_range) & 
                            (df_full["EM_cor_norm"] > fai_em_threshold))

    # Inicializar FAI_true como False
    df_full["FAI_true"] = False

    if duration and FAI_duration > 1:
        # 2Ô∏è‚É£ Calcular diferencias de tiempo entre filas consecutivas
        df_full["delta_min"] = df_full[date_column].diff().dt.total_seconds().div(60).fillna(1)
        
        # 3Ô∏è‚É£ Crear grupos de secuencias consecutivas de alertas activas
        df_full["group_id"] = ((df_full["FAI_alert"] != df_full["FAI_alert"].shift()) |
                               (df_full["delta_min"] > 1)).cumsum()
        
        # 4Ô∏è‚É£ Calcular minutos desde el inicio del grupo
        df_full["duration_from_start"] = df_full.groupby("group_id")[date_column] \
                                             .transform(lambda x: (x - x.iloc[0]).dt.total_seconds()/60)
        
        # 5Ô∏è‚É£ FAI_true solo si la alerta est√° activa y la duraci√≥n m√≠nima en minutos se cumple
        df_full["FAI_true"] = df_full["FAI_alert"] & (df_full["duration_from_start"] >= (FAI_duration - 1))
    
    # df_fai_all: todos los candidatos
    df_fai_all = df_full[df_full["FAI_alert"]].copy()
    
    # df_fai_true: seg√∫n duraci√≥n m√≠nima
    df_fai_true = df_full[df_full["FAI_true"]].copy()

    # 6Ô∏è‚É£ Filtrar FAI que coinciden con flares
    df_fai_filtered = df_fai_true.copy()
    if filter_flare_coincidence and df_flares is not None and len(df_fai_true) > 0:
        mask_no_flare_coincidence = pd.Series(True, index=df_fai_true.index)
        
        for _, flare in df_flares.iterrows():
            peak_time = flare[flare_peak_col]
            end_time = flare[flare_end_col]
            if pd.isna(peak_time) or pd.isna(end_time):
                continue
            flare_mask = (df_fai_true[date_column] >= peak_time) & (df_fai_true[date_column] <= end_time)
            mask_no_flare_coincidence &= ~flare_mask
        
        df_fai_filtered = df_fai_true.loc[mask_no_flare_coincidence].copy()

    if verbose:
        print(f"‚úÖ Se encontraron {len(df_fai_all)} puntos candidatos a FAI (T_cor {fai_temp_range}, EM_cor_norm > {fai_em_threshold})")
        if duration and FAI_duration > 1:
            print(f"‚úÖ Se encontraron {len(df_fai_true)} FAI verdaderos con duraci√≥n m√≠nima de {FAI_duration} minutos consecutivos.")
        if filter_flare_coincidence:
            filtered_count = len(df_fai_true) - len(df_fai_filtered)
            print(f"‚úÖ Se filtraron {filtered_count} FAI que coincid√≠an con flares, quedan {len(df_fai_filtered)} FAI filtrados.\n")

    return {
        "df_fai_all": df_fai_all,
        "df_fai_true": df_fai_true,
        "df_fai_filtered": df_fai_filtered
    }


### Funci√≥n para calcular la anticipaci√≥n del FAI

In [46]:
######################################
# 8. Tiempo de Anticipacion de flare #
######################################

def anticipation_fai_analysis_v2(df_fai_selected, df_flare_data,
                                start_col="StartTime",
                                peak_col="PeakTime",
                                end_col="EndTime",
                                window_minutes=30, # minutos antes del flare
                                max_prev_flare_minutes=180): # rango para calcular tiempo desde flare anterior
    """
    Analiza FAIs alrededor de cada flare y calcula varias m√©tricas:
    - cantidad de FAIs en distintas ventanas temporales
    - tiempos de anticipaci√≥n respecto a StartTime y PeakTime
    - tiempo desde el flare anterior (si est√° dentro del rango)
    Par√°metros:
    - df_fai_selected: DataFrame con FAIs (√≠ndice temporal)
    - df_flare_data: DataFrame con columnas StartTime, PeakTime, EndTime
    - window_minutes: minutos antes del flare a considerar para buscar FAIs
    - max_prev_flare_minutes: rango m√°ximo para calcular tiempo desde flare anterior

    Retorna:
    - df_flare_data con columnas nuevas:
        FAIalerts_W, FAIalerts_WStart,FAIalerts_WPeak, FAIalerts_StartPeak, FAIalerts_PeakEnd,
        AnticipationStart, AnticipationPeak, Peak_to_lastFAI, Time_since_prev_flare
    """

    df = df_flare_data.copy()
    df[start_col] = pd.to_datetime(df[start_col])
    df[peak_col] = pd.to_datetime(df[peak_col])
    df[end_col] = pd.to_datetime(df[end_col])
    fai_times = pd.to_datetime(df_fai_selected["date"]).sort_values()

    # asegurar orden temporal
    df = df.sort_values(peak_col).reset_index(drop=True)

    # columnas a crear
    results = {
    "FAIalerts_W": [],
    "FAIalerts_WStart": [],
    "FAIalerts_WPeak": [],
    "FAIalerts_StartPeak": [],
    "FAIalerts_PeakEnd": [],
    "FAIalerts_startEnd": [],
    "AnticipationStart": [],
    "AnticipationPeak": [],
    "Peak_to_lastFAI": [],
    "Time_since_prev_flare_end": [],
    "Time_since_prev_flare_peak": []
    }

    for i, row in df.iterrows():
        start_t = row[start_col]
        peak_t = row[peak_col]
        end_t = row[end_col]

        # üîπ L√≠mite inferior: EndTime del flare anterior o ventana
        if i > 0:
            prev_end = df.loc[i-1, end_col]
            prev_peak= df.loc[i-1, peak_col]
        else:
            prev_end = pd.Timestamp.min
            prev_peak=  pd.Timestamp.min

        lower_limit = max(prev_end, peak_t - timedelta(minutes=window_minutes))  # l√≠mite inferior end del flare anterior
        #lower_limit = max(prev_peak, peak_t - timedelta(minutes=window_minutes))  # l√≠mite inferior peak del flare anterior
        #lower_limit =  peak_t - timedelta(minutes=window_minutes)                # l√≠mite inferior 30 min antes del peak
        upper_limit = end_t

        # üîπ FAIs v√°lidos en la ventana general
        valid_fais = fai_times[(fai_times >= lower_limit) & (fai_times <= upper_limit)]

        # Contar FAIs por zonas
        w_total = len(valid_fais)
        w_start = len(valid_fais[valid_fais < start_t])
        w_peak = len(valid_fais[valid_fais < peak_t])
        start_peak = len(valid_fais[(valid_fais >= start_t) & (valid_fais < peak_t)])
        peak_end = len(valid_fais[(valid_fais >= peak_t) & (valid_fais <= end_t)])
        start_end = len(valid_fais[(valid_fais >= start_t) & (valid_fais <= end_t)])

        results["FAIalerts_W"].append(w_total)
        results["FAIalerts_WStart"].append(w_start)
        results["FAIalerts_WPeak"].append(w_peak)
        results["FAIalerts_StartPeak"].append(start_peak)
        results["FAIalerts_PeakEnd"].append(peak_end)
        results["FAIalerts_startEnd"].append(start_end)

        # üîπ Calcular anticipaciones
        if not valid_fais.empty:
            earliest_fai = valid_fais.min()
            last_fai = valid_fais.max()

            anticipation_start = (start_t - earliest_fai).total_seconds() / 60.0 if earliest_fai < start_t else None
            anticipation_peak = (peak_t - earliest_fai).total_seconds() / 60.0 if earliest_fai < peak_t else None
            peak_to_last_fai = (peak_t - last_fai).total_seconds() / 60.0  # puede ser negativo

        else:
            anticipation_start = anticipation_peak = peak_to_last_fai = None

        results["AnticipationStart"].append(anticipation_start)
        results["AnticipationPeak"].append(anticipation_peak)
        results["Peak_to_lastFAI"].append(peak_to_last_fai)

        # üîπ Calcular tiempo desde el flare anterior
        if i > 0:
            prev_end_time = df.loc[i-1, end_col]       # flare anterior: EndTime
            prev_peak_time = df.loc[i-1, peak_col]     # flare anterior: PeakTime

            delta_end = (peak_t - prev_end_time).total_seconds() / 60.0
            delta_peak = (peak_t - prev_peak_time).total_seconds() / 60.0

            time_since_prev_end = delta_end if delta_end <= max_prev_flare_minutes else None
            time_since_prev_peak = delta_peak if delta_peak <= max_prev_flare_minutes else None
        else:
            time_since_prev_end = None
            time_since_prev_peak = None

        # Guardar los valores en results
        results["Time_since_prev_flare_end"].append(time_since_prev_end)
        results["Time_since_prev_flare_peak"].append(time_since_prev_peak)

    # A√±adir las nuevas columnas al dataframe
    for col, values in results.items():
        df[col] = values

    # ======================================================
    # üî∏ ESTAD√çSTICAS GLOBALES
    # ======================================================
    total_fais = len(df_fai_selected)
    total_flares = len(df)
    total_fais_in_windows = df["FAIalerts_W"].sum()

    flares_with_fais = (df["FAIalerts_W"] > 0).sum()
    flares_without_fais = total_flares - flares_with_fais  # flares sin FAI
    flares_with_fai_before_start = (df["FAIalerts_WStart"] > 0).sum()
    flares_without_fai_before_start = total_flares - flares_with_fai_before_start
    
    flares_with_fai_before_peak = (df["FAIalerts_WPeak"] > 0).sum()
    flares_without_fai_before_peak = total_flares - flares_with_fai_before_peak

    pct_flares_with_fais = 100 * flares_with_fais / total_flares if total_flares > 0 else 0
    pct_flares_without_fais = 100 * flares_without_fais / total_flares if total_flares > 0 else 0 

    # C√°lculo de porcentajes
    pct_flares_with_fais = 100 * flares_with_fais / total_flares if total_flares > 0 else 0
    pct_flares_without_fais = 100 * flares_without_fais / total_flares if total_flares > 0 else 0
    pct_flares_with_fai_before_start = 100 * flares_with_fai_before_start / total_flares if total_flares > 0 else 0
    pct_flares_without_fai_before_start = 100 * flares_without_fai_before_start / total_flares if total_flares > 0 else 0
    pct_flares_with_fai_before_peak = 100 * flares_with_fai_before_peak / total_flares if total_flares > 0 else 0
    pct_flares_without_fai_before_peak = 100 * flares_without_fai_before_peak / total_flares if total_flares > 0 else 0

    # Totales por subventana
    fai_WStart = df["FAIalerts_WStart"].sum()
    fai_WPeak = df["FAIalerts_WPeak"].sum()
    fai_StartPeak = df["FAIalerts_StartPeak"].sum()
    fai_PeakEnd = df["FAIalerts_PeakEnd"].sum()
    fai_StartEnd = df["FAIalerts_startEnd"].sum()

    # Porcentajes relativos
    def pct(x): return 100 * x / total_fais if total_fais > 0 else 0

    mean_fais_per_flare = df["FAIalerts_W"].mean()
    anticipations = df["AnticipationPeak"].dropna()
    mean_anticipation = anticipations.mean() if not anticipations.empty else np.nan
    min_anticipation = anticipations.min() if not anticipations.empty else np.nan
    max_anticipation = anticipations.max() if not anticipations.empty else np.nan

    print("\n" + "="*70)
    print("üìä RESUMEN DE AN√ÅLISIS FAI‚ÄìFLARE")
    print("="*70)
    print(f"üîπ Total de FAIs analizados: {total_fais}")
    print(f"üîπ Total de flares analizados: {total_flares}")
    print(f"üîπ FAIs encontrados dentro de ventanas: {total_fais_in_windows} ({pct(total_fais_in_windows):.1f}%)")
    print(f"üîπ Promedio de FAIs por flare: {mean_fais_per_flare:.2f}")
    print("------------------------------------------------------")
    print("üìà ESTAD√çSTICAS DE ASOCIACI√ìN:")
    print(f"   ‚Ä¢ Flares con ‚â•1 FAI (cualquier ventana): {flares_with_fais} ({pct_flares_with_fais:.1f}%)")
    print(f"   ‚Ä¢ Flares SIN FAI (ninguna ventana): {flares_without_fais} ({pct_flares_without_fais:.1f}%)")
    print(f"   ‚Ä¢ Flares con FAI ANTES del inicio: {flares_with_fai_before_start} ({pct_flares_with_fai_before_start:.1f}%)")
    print(f"   ‚Ä¢ Flares SIN FAI antes del inicio: {flares_without_fai_before_start} ({pct_flares_without_fai_before_start:.1f}%)")
    print(f"   ‚Ä¢ Flares con FAI ANTES del pico: {flares_with_fai_before_peak} ({pct_flares_with_fai_before_peak:.1f}%)")
    print(f"   ‚Ä¢ Flares SIN FAI antes del pico: {flares_without_fai_before_peak} ({pct_flares_without_fai_before_peak:.1f}%)")
    print("------------------------------------------------------")
    print("üìç DISTRIBUCI√ìN DE FAIs POR VENTANA:")
    print(f"   ‚Ä¢ Antes del inicio (WStart): {fai_WStart} ({pct(fai_WStart):.1f}%)")
    print(f"   ‚Ä¢ Antes del pico (WPeak):    {fai_WPeak} ({pct(fai_WPeak):.1f}%)")
    print(f"   ‚Ä¢ Entre inicio(inclusive) y pico:          {fai_StartPeak} ({pct(fai_StartPeak):.1f}%)")
    print(f"   ‚Ä¢ Entre pico(inclusive) y fin(inclusive):  {fai_PeakEnd} ({pct(fai_PeakEnd):.1f}%)")
    print(f"   ‚Ä¢ Entre inicio(inclusive) y fin(inclusive):{fai_StartEnd} ({pct(fai_StartEnd):.1f}%)")
    print("------------------------------------------------------")
    if not anticipations.empty:
        print(f"üî∏ Anticipaci√≥n media (respecto al pico): {mean_anticipation:.1f} min")
        print(f"üî∏ Anticipaci√≥n m√≠nima: {min_anticipation:.1f} min")
        print(f"üî∏ Anticipaci√≥n m√°xima: {max_anticipation:.1f} min")
    else:
        print("‚ö†Ô∏è No se encontraron FAIs previos para calcular anticipaciones.")
    print("="*70 + "\n")

    # ======================================================
    # üî∏ C√ÅLCULO DE TIEMPOS RELATIVOS (normalizados por el rise time StartPeak)
    # ======================================================
    if "StartPeak" in df.columns:
        df["RelAnticipation_Peak"] = df["AnticipationPeak"] / df["StartPeak"]
        df["RelAnticipation_Start"] = df["AnticipationStart"] / df["StartPeak"]
        print("‚úÖ Columnas 'RelAnticipation_Peak' y 'RelAnticipation_Start' a√±adidas (Œît / StartPeak).")
    else:
        print("‚ö†Ô∏è No se encontr√≥ la columna 'StartPeak'. No se calcularon tiempos relativos.")

    return df




### Funci√≥n para asociar buscar todos los FAI asociados a flares

In [47]:
#
def associate_fai_to_flare_dataframes(df_fai_selected, df_flares, 
                                      window_minutes=30, include_inside=True):
    """
    Asocia cada FAI al primer flare relevante seg√∫n este orden:
    1Ô∏è‚É£ Flare cuyo PeakTime est√© despu√©s del FAI (dentro de la ventana).
    2Ô∏è‚É£ Si no hay, flare cuyo EndTime est√© despu√©s del FAI (dentro de la ventana).
    3Ô∏è‚É£ (Opcional) Si no hay, flare activo en ese momento (StartTime <= FAI <= EndTime).

    Calcula adem√°s los tiempos relativos entre FAI y Start, Peak y End.

    Par√°metros:
    - df_fai_selected: DataFrame con alertas FAI (debe tener columna 'date' o similar)
    - df_flares: DataFrame con flares (con StartTime, PeakTime, EndTime, Class, etc.)
    - window_minutes: ventana de b√∫squeda hacia adelante desde el FAI
    - include_inside: bool, si True busca tambi√©n FAIs dentro de flares activos

    Retorna:
    - df_fai: DataFrame con columnas adicionales sobre el flare asociado
    """

    import pandas as pd

    df_fai = df_fai_selected.copy()
    df_flares_copy = df_flares.copy()

    # Detectar la columna de tiempo FAI
    time_col_fai = None
    for col in ['date', 'Unnamed: 0']:
        if col in df_fai.columns:
            time_col_fai = col
            break
    if time_col_fai is None:
        raise ValueError("No se pudo identificar la columna de tiempo en df_fai_selected")

    print(f"Usando columna de tiempo FAI: {time_col_fai}")

    # Convertir tiempos a datetime
    df_fai['Time_FAI'] = pd.to_datetime(df_fai[time_col_fai])
    for c in ['StartTime', 'PeakTime', 'EndTime']:
        df_flares_copy[c] = pd.to_datetime(df_flares_copy[c])

    # Inicializar columnas
    for col in ['Associated_Flare', 'Flare_ID', 'F_StartTime', 'F_PeakTime', 'F_EndTime',
                'F_Class', 'F_ClassLetter', 'F_ClassNumber', 'F_ClassGroup', 'F_Observatory', 
                'F_StartPeak', 'F_PeakEnd', 'F_StartEnd', 'Association_Type',
                'Time_to_flare', 'FAI_to_start', 'FAI_to_peak', 'FAI_to_end']:
        df_fai[col] = None
    df_fai['Associated_Flare'] = False

    window = pd.Timedelta(minutes=window_minutes)
    associated_count = 0

    print(f"Procesando {len(df_fai)} alertas FAI...")

    # --- Bucle principal ---
    for idx, row in df_fai.iterrows():
        fai_time = row['Time_FAI']
        flare = None
        ref_type = None

        # 1Ô∏è‚É£ Buscar flare cuyo PeakTime est√© despu√©s del FAI
        mask_peak = (df_flares_copy['PeakTime'] >= fai_time) & (df_flares_copy['PeakTime'] <= fai_time + window)
        candidate_peak = df_flares_copy[mask_peak].sort_values('PeakTime')

        if not candidate_peak.empty:
            flare = candidate_peak.iloc[0]
            ref_type = "PeakTime"

        else:
            # 2Ô∏è‚É£ Buscar flare cuyo EndTime est√© despu√©s del FAI
            mask_end = (df_flares_copy['EndTime'] >= fai_time) & (df_flares_copy['EndTime'] <= fai_time + window)
            candidate_end = df_flares_copy[mask_end].sort_values('EndTime')

            if not candidate_end.empty:
                flare = candidate_end.iloc[0]
                ref_type = "EndTime"

            # 3Ô∏è‚É£ (opcional) flare activo durante el FAI
            elif include_inside:
                mask_inside = (df_flares_copy['StartTime'] <= fai_time) & (df_flares_copy['EndTime'] >= fai_time)
                candidate_inside = df_flares_copy[mask_inside].sort_values('StartTime')

                if not candidate_inside.empty:
                    flare = candidate_inside.iloc[0]
                    ref_type = "Inside"

        # Si no se encontr√≥ flare, pasar al siguiente
        if flare is None:
            continue

        # --- Asociar informaci√≥n ---
        df_fai.at[idx, 'Associated_Flare'] = True
        df_fai.at[idx, 'Flare_ID'] = flare['Flare_ID']
        df_fai.at[idx, 'F_StartTime'] = flare['StartTime']
        df_fai.at[idx, 'F_PeakTime'] = flare['PeakTime']
        df_fai.at[idx, 'F_EndTime'] = flare['EndTime']
        df_fai.at[idx, 'F_Class'] = flare['Class']
        df_fai.at[idx, 'F_ClassLetter'] = flare['ClassLetter']
        df_fai.at[idx, 'F_ClassNumber'] = flare['ClassNumber']
        df_fai.at[idx, 'F_ClassGroup'] = flare['ClassGroup']
        df_fai.at[idx, 'F_Observatory'] = flare.get('Observatory', None)
        df_fai.at[idx, 'F_StartPeak'] = flare['StartPeak']
        df_fai.at[idx, 'F_PeakEnd'] = flare['PeakEnd']
        df_fai.at[idx, 'F_StartEnd'] = flare['StartEnd']
        df_fai.at[idx, 'Association_Type'] = ref_type

        # Calcular tiempos relativos (minutos)
        df_fai.at[idx, 'FAI_to_start'] = (flare['StartTime'] - fai_time).total_seconds() / 60
        df_fai.at[idx, 'FAI_to_peak'] = (flare['PeakTime'] - fai_time).total_seconds() / 60
        df_fai.at[idx, 'FAI_to_end'] = (flare['EndTime'] - fai_time).total_seconds() / 60

        if ref_type == "PeakTime":
            df_fai.at[idx, 'Time_to_flare'] = df_fai.at[idx, 'FAI_to_peak']
        elif ref_type == "EndTime":
            df_fai.at[idx, 'Time_to_flare'] = df_fai.at[idx, 'FAI_to_end']
        else:  # Inside
            df_fai.at[idx, 'Time_to_flare'] = 0

        associated_count += 1

    # --- Estad√≠sticas ---
    total_fai = len(df_fai)
    pct = (associated_count / total_fai) * 100

    print(f"\n--- Estad√≠sticas de Asociaci√≥n FAI-Flare ---")
    print(f"Total FAIs: {total_fai}")
    print(f"FAIs asociados: {associated_count} ({pct:.1f}%)")
    print(f"Ventana: {window_minutes} minutos hacia adelante")
    print(f"Incluir FAIs dentro de flares activos: {include_inside}")

    if associated_count > 0:
        valid_times = df_fai['Time_to_flare'].dropna()
        print(f"Tiempo medio a flare: {valid_times.mean():.1f} min")
        print(f"Tiempo m√≠nimo: {valid_times.min():.1f} min")
        print(f"Tiempo m√°ximo: {valid_times.max():.1f} min")

        class_dist = df_fai[df_fai['Associated_Flare']]['F_Class'].value_counts()
        print("\nDistribuci√≥n por clase:")
        for cls, n in class_dist.items():
            print(f"  {cls}: {n}")

    return df_fai




## Valid Data:

In [2]:
n=185
# Obtener fecha actual en formato YYYY-MM-DD
#fecha_actual = datetime.now().strftime("%Y-%m-%d")
fecha_actual = "2025-11-04"
window_minutes=30
output_dir = f"{fecha_actual}_Analysis_for_{n}_days"

# Path of cleaned data in csv
csv_path_full = f"{fecha_actual}_Analysis_for_{n}_days/df_full_{n}_valid.csv"
# Path of valid flares: 
csv_path_flares = f"{fecha_actual}_Analysis_for_{n}_days/df_flares_{n}_valid.csv"


df_full_valid = pd.read_csv(csv_path_full)
df_flares_valid = pd.read_csv(csv_path_flares)

In [3]:
df_flares_valid["StartTime"] = pd.to_datetime(df_flares_valid["StartTime"])
df_flares_valid["PeakTime"]  = pd.to_datetime(df_flares_valid["PeakTime"])
df_flares_valid["EndTime"]   = pd.to_datetime(df_flares_valid["EndTime"])



## prueba para fai_from_df

In [4]:
fai_temp_range= (7, 14)
fai_em_threshold= 0.005
date_column = "date"
duration = True
FAI_duration = 3

In [None]:

df_full = df_full_valid.copy()
# nueva comlumna: True si cumple los criterios de ser FAI alert
df_full["FAI_alert"] = (df_full["T_cor"].between(*fai_temp_range) & (df_full["EM_cor_norm"] > fai_em_threshold))

df_full[date_column] = pd.to_datetime(df_full[date_column], errors="coerce")

# Inicializar FAI_true como False
df_full["FAI_true"] = False

# Calcular salto temporal entre filas consecutivas
df_full["delta_min"] = df_full[date_column].diff().dt.total_seconds().div(60).fillna(1)
df_full["group_id"] = ((df_full["FAI_alert"] != df_full["FAI_alert"].shift()) | (df_full["delta_min"] > 1)).cumsum()

# calcular minutos desde inicio del grupo
df_full["duration_from_start"] = df_full.groupby("group_id")[date_column].transform(lambda x: (x - x.iloc[0]).dt.total_seconds()/60)

# FAI_true: solo cuando alert activa y duraci√≥n >= FAI_duration -1
FAI_duration = 3
df_full["FAI_true"] = df_full["FAI_alert"] & (df_full["duration_from_start"] >= (FAI_duration-1))


In [25]:
df_full.iloc[:3][["GOES_ID", "date","T_cor", "EM_cor_norm", "FAI_alert", "duration_from_start",	"delta_min",	"group_id",	"FAI_true" ]]


Unnamed: 0,GOES_ID,date,T_cor,EM_cor_norm,FAI_alert,duration_from_start,delta_min,group_id,FAI_true
0,GOES_0,2000-06-06 00:05:00,12.571802,0.00184,False,0.0,1.0,1,False
1,GOES_1,2000-06-06 00:06:00,12.297073,0.002749,False,1.0,1.0,1,False
2,GOES_2,2000-06-06 00:07:00,12.875831,0.003267,False,2.0,1.0,1,False


In [15]:
mask = (df_full["date"] >= "2000-06-06 23:50:00") & (df_full["date"] <= "2000-06-07 00:10:00")
df_filtered = df_full.loc[mask, ["GOES_ID", "date","T_cor", "EM_cor_norm", "FAI_alert","delta_min","group_id","FAI_true"]]
df_filtered


Unnamed: 0,GOES_ID,date,T_cor,EM_cor_norm,FAI_alert,delta_min,group_id,FAI_true


### Info data valid

In [50]:
df_flares_valid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2020 entries, 0 to 2019
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Flare_ID     2020 non-null   object        
 1   StartTime    2020 non-null   datetime64[ns]
 2   PeakTime     2020 non-null   datetime64[ns]
 3   EndTime      2020 non-null   datetime64[ns]
 4   Class        2020 non-null   object        
 5   ClassLetter  2020 non-null   object        
 6   ClassNumber  2020 non-null   float64       
 7   ClassGroup   2020 non-null   object        
 8   Observatory  2020 non-null   object        
 9   StartPeak    2020 non-null   float64       
 10  PeakEnd      2020 non-null   float64       
 11  StartEnd     2020 non-null   float64       
dtypes: datetime64[ns](3), float64(4), object(5)
memory usage: 189.5+ KB


In [51]:
df_full_valid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64604 entries, 0 to 64603
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   GOES_ID       64604 non-null  object 
 1   date          64604 non-null  object 
 2   observatory   64604 non-null  object 
 3   xrsa          64604 non-null  float64
 4   xrsb          64604 non-null  float64
 5   xrsa_corr     64604 non-null  float64
 6   xrsb_corr     64604 non-null  float64
 7   T_cor         64604 non-null  float64
 8   EM_cor        64604 non-null  float64
 9   T_phot        64604 non-null  float64
 10  EM_phot       64604 non-null  float64
 11  EM_cor_norm   64604 non-null  float64
 12  EM_phot_norm  64604 non-null  float64
dtypes: float64(10), object(3)
memory usage: 6.4+ MB


## Elegir datos GOES que son FAI

In [29]:
window_minutes=30
fai_temp_range = (7, 14)
fai_em_threshold = 0.005
date_column = "date"
duration = True
FAI_duration = 3
filter_flare_coincidence = True
method = "true" # "all", "true" o "filtered"

result = fai_from_df(df_full=df_full_valid,
                    fai_temp_range = fai_temp_range,
                    fai_em_threshold = fai_em_threshold,
                    date_column = "date",
                    duration = duration,
                    FAI_duration = FAI_duration,
                    filter_flare_coincidence = filter_flare_coincidence,
                    df_flares = df_flares_valid,
                    flare_peak_col = "PeakTime",
                    flare_end_col = "EndTime",
                    verbose = True )

# Acceder a los resultados
df_fai_all = result["df_fai_all"]        # Todos los candidatos
df_fai_true = result["df_fai_true"]      # FAI con duraci√≥n m√≠nima
df_fai_filtered = result["df_fai_filtered"]  # FAI sin coincidencia con flares

# all = todos los FAI segun cr√≠terios de EM y T
# true = todos los FAI segun cr√≠terios de EM, T y duraci√≥n del FAI activado
# filtered = todos los FAI segun cr√≠terios de EM, T, duraci√≥n del FAI activado
#            y que no est√°n entre el peak y end de una fulguraci√≥n
#method = "all"  # "all", "true" o "filtered"

method_mapping = {
    "all": df_fai_all,
    "true": df_fai_true, 
    "filtered": df_fai_filtered
}

if method in method_mapping:
    df_fai_selected_calculate = method_mapping[method]
else:
    raise ValueError(f"M√©todo '{method}' no reconocido. Use 'all', 'true' o 'filtered'")

‚úÖ Se encontraron 34071 puntos candidatos a FAI (T_cor (7, 14), EM_cor_norm > 0.005)
‚úÖ Se encontraron 19308 FAI verdaderos con duraci√≥n m√≠nima de 3 minutos consecutivos.
‚úÖ Se filtraron 2000 FAI que coincid√≠an con flares, quedan 17308 FAI filtrados.



In [32]:
df_fai_all.head()

Unnamed: 0,GOES_ID,date,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start
8,GOES_8,2000-06-06 00:15:00,GOES-16,6.398151e-08,1e-06,5.200526e-08,3.744052e-07,13.319638,2.088278e+47,12.506248,4.879989e+47,0.020883,0.0488,True,False,1.0,3,0.0
9,GOES_9,2000-06-06 00:16:00,GOES-16,5.146598e-08,1e-06,3.738324e-08,3.462321e-07,12.010261,2.115799e+47,10.99046,5.388108e+47,0.021158,0.053881,True,False,1.0,3,1.0
10,GOES_10,2000-06-06 00:17:00,GOES-16,4.194724e-08,1e-06,2.585677e-08,2.906549e-07,11.058271,1.937464e+47,9.953444,5.260247e+47,0.019375,0.052602,True,True,1.0,3,2.0
11,GOES_11,2000-06-06 00:23:00,GOES-16,4.063336e-08,1e-06,4.950806e-09,1.062962e-07,8.298321,1.054738e+47,7.279352,3.402429e+47,0.010547,0.034024,True,False,6.0,4,0.0
12,GOES_12,2000-06-06 00:24:00,GOES-16,4.321096e-08,1e-06,8.710359e-09,1.461166e-07,9.254645,1.224493e+47,8.167912,3.721751e+47,0.012245,0.037218,True,False,1.0,4,1.0


In [35]:
df_fai_all["FAI_alert"].value_counts()


FAI_alert
True    34071
Name: count, dtype: int64

In [36]:
df_fai_all["FAI_true"].value_counts()


FAI_true
True     19308
False    14763
Name: count, dtype: int64

In [37]:
df_fai_true["FAI_true"].value_counts()


FAI_true
True    19308
Name: count, dtype: int64

In [38]:
df_fai_filtered["FAI_true"].value_counts()


FAI_true
True    17308
Name: count, dtype: int64

### Carpeta para guardar el FAI

In [53]:
# Crear subcarpeta para guardar resultados de an√°lisis
analysis_dir = os.path.join(output_dir, "Analysis_FAI")
# Crear carpeta de an√°lisis (si no existe) y mostrar mensaje
if os.path.exists(analysis_dir ):
    print(f"‚ö†Ô∏è La carpeta de an√°lisis ya exist√≠a: {analysis_dir }")
else:
    os.makedirs(analysis_dir , exist_ok=True)
    print(f"üìÅ Carpeta de an√°lisis creada: {output_dir}")

# nombres de archivos
file_all = os.path.join(
    analysis_dir,
    f"df_fai_all_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
)
file_true = os.path.join(
    analysis_dir,
    f"df_fai_true_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
)
file_filtered = os.path.join(
    analysis_dir,
    f"df_fai_filtered_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
)

# Guardar CSV
df_fai_all.to_csv(file_all, index=False)
df_fai_true.to_csv(file_true, index=False)
df_fai_filtered.to_csv(file_filtered, index=False)

# Mostrar rutas completas
print(f"‚úÖ Todos los FAI candidatos guardados correctamente en:\n{file_all}")
print(f"‚úÖ Todos los FAI verdaderos guardados correctamente en:\n{file_true}")
print(f"‚úÖ Todos los FAI filtrados guardados correctamente en:\n{file_filtered}")

‚ö†Ô∏è La carpeta de an√°lisis ya exist√≠a: 2025-11-04_Analysis_for_185_days/Analysis_FAI
‚úÖ Todos los FAI candidatos guardados correctamente en:
2025-11-04_Analysis_for_185_days/Analysis_FAI/df_fai_all_T7-14_EM0.005_dur3min.csv
‚úÖ Todos los FAI verdaderos guardados correctamente en:
2025-11-04_Analysis_for_185_days/Analysis_FAI/df_fai_true_T7-14_EM0.005_dur3min.csv
‚úÖ Todos los FAI filtrados guardados correctamente en:
2025-11-04_Analysis_for_185_days/Analysis_FAI/df_fai_filtered_T7-14_EM0.005_dur3min.csv


In [54]:
df_fai_all.info()

<class 'pandas.core.frame.DataFrame'>
Index: 34071 entries, 8 to 64597
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   GOES_ID       34071 non-null  object        
 1   date          34071 non-null  datetime64[ns]
 2   observatory   34071 non-null  object        
 3   xrsa          34071 non-null  float64       
 4   xrsb          34071 non-null  float64       
 5   xrsa_corr     34071 non-null  float64       
 6   xrsb_corr     34071 non-null  float64       
 7   T_cor         34071 non-null  float64       
 8   EM_cor        34071 non-null  float64       
 9   T_phot        34071 non-null  float64       
 10  EM_phot       34071 non-null  float64       
 11  EM_cor_norm   34071 non-null  float64       
 12  EM_phot_norm  34071 non-null  float64       
dtypes: datetime64[ns](1), float64(10), object(2)
memory usage: 3.6+ MB


In [55]:
df_fai_true.info()

<class 'pandas.core.frame.DataFrame'>
Index: 20598 entries, 10 to 64597
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   GOES_ID       20598 non-null  object        
 1   date          20598 non-null  datetime64[ns]
 2   observatory   20598 non-null  object        
 3   xrsa          20598 non-null  float64       
 4   xrsb          20598 non-null  float64       
 5   xrsa_corr     20598 non-null  float64       
 6   xrsb_corr     20598 non-null  float64       
 7   T_cor         20598 non-null  float64       
 8   EM_cor        20598 non-null  float64       
 9   T_phot        20598 non-null  float64       
 10  EM_phot       20598 non-null  float64       
 11  EM_cor_norm   20598 non-null  float64       
 12  EM_phot_norm  20598 non-null  float64       
dtypes: datetime64[ns](1), float64(10), object(2)
memory usage: 2.2+ MB


## Calculo de tiempos de anticipaci√≥n

In [56]:

df_anticipation_time = anticipation_fai_analysis_v2(
                                        df_fai_selected=df_fai_selected_calculate,
                                        df_flare_data=df_flares_valid,
                                        start_col="StartTime",
                                        peak_col="PeakTime",
                                        end_col="EndTime",
                                        window_minutes=window_minutes, # tiempo del peak hacia atr√°s para buscar FAIs
                                        max_prev_flare_minutes=180  # (3h)desde el peak hacia atras  para buscar flares
                                    )

# nombre de archivo
file_anticipation_time  = os.path.join(
    analysis_dir,
    f"df_anticipation_time_{method}_(W_{window_minutes})_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
)

# Guardar DataFrame de anticipaci√≥n
df_anticipation_time.to_csv(file_anticipation_time, index=False)

print(f"‚úÖ df_anticipation_time guardado correctamente en:\n{file_anticipation_time}")


üìä RESUMEN DE AN√ÅLISIS FAI‚ÄìFLARE
üîπ Total de FAIs analizados: 20598
üîπ Total de flares analizados: 2020
üîπ FAIs encontrados dentro de ventanas: 9918 (48.2%)
üîπ Promedio de FAIs por flare: 4.91
------------------------------------------------------
üìà ESTAD√çSTICAS DE ASOCIACI√ìN:
   ‚Ä¢ Flares con ‚â•1 FAI (cualquier ventana): 1562 (77.3%)
   ‚Ä¢ Flares SIN FAI (ninguna ventana): 458 (22.7%)
   ‚Ä¢ Flares con FAI ANTES del inicio: 683 (33.8%)
   ‚Ä¢ Flares SIN FAI antes del inicio: 1337 (66.2%)
   ‚Ä¢ Flares con FAI ANTES del pico: 1358 (67.2%)
   ‚Ä¢ Flares SIN FAI antes del pico: 662 (32.8%)
------------------------------------------------------
üìç DISTRIBUCI√ìN DE FAIs POR VENTANA:
   ‚Ä¢ Antes del inicio (WStart): 2650 (12.9%)
   ‚Ä¢ Antes del pico (WPeak):    7881 (38.3%)
   ‚Ä¢ Entre inicio(inclusive) y pico:          5231 (25.4%)
   ‚Ä¢ Entre pico(inclusive) y fin(inclusive):  2037 (9.9%)
   ‚Ä¢ Entre inicio(inclusive) y fin(inclusive):7268 (35.3%)
------------

In [57]:
df_anticipation_time.sort_values(by="FAIalerts_W", ascending=False)[:2]

Unnamed: 0,Flare_ID,StartTime,PeakTime,EndTime,Class,ClassLetter,ClassNumber,ClassGroup,Observatory,StartPeak,...,FAIalerts_StartPeak,FAIalerts_PeakEnd,FAIalerts_startEnd,AnticipationStart,AnticipationPeak,Peak_to_lastFAI,Time_since_prev_flare_end,Time_since_prev_flare_peak,RelAnticipation_Peak,RelAnticipation_Start
1025,Flare_20141026_1025,2014-10-26 17:08:00,2014-10-26 17:17:00,2014-10-26 17:30:00,M1.0,M,1.0,M1-4.9,GOES,9.0,...,9,2,11,19.0,28.0,-1.0,55.0,60.0,3.111111,2.111111
1369,Flare_20230805_1369,2023-08-05 06:16:00,2023-08-05 07:18:00,2023-08-05 09:06:00,M1.6,M,1.6,M1-4.9,GOES,62.0,...,28,2,30,,30.0,-55.0,102.0,109.0,0.483871,


## sort.value

In [58]:
df_anticipation_time.sort_values(by="Time_since_prev_flare_end", ascending=True)[:4]

Unnamed: 0,Flare_ID,StartTime,PeakTime,EndTime,Class,ClassLetter,ClassNumber,ClassGroup,Observatory,StartPeak,...,FAIalerts_StartPeak,FAIalerts_PeakEnd,FAIalerts_startEnd,AnticipationStart,AnticipationPeak,Peak_to_lastFAI,Time_since_prev_flare_end,Time_since_prev_flare_peak,RelAnticipation_Peak,RelAnticipation_Start
1473,Flare_20240323_1473,2024-03-23 22:56:00,2024-03-23 22:59:00,2024-03-23 23:03:00,C7.9,C,7.9,C5-9.9,GOES,3.0,...,0,0,0,,,,3.0,9.0,,
1719,Flare_20240729_1719,2024-07-29 01:36:00,2024-07-29 01:39:00,2024-07-29 01:43:00,C9.2,C,9.2,C5-9.9,GOES,3.0,...,0,0,0,,,,3.0,9.0,,
1629,Flare_20240515_1629,2024-05-15 08:13:00,2024-05-15 08:16:00,2024-05-15 08:20:00,C9.9,C,9.9,C5-9.9,GOES,3.0,...,0,0,0,,,,3.0,34.0,,
1659,Flare_20240531_1659,2024-05-31 06:34:00,2024-05-31 06:37:00,2024-05-31 06:39:00,C2.2,C,2.2,C1-4.9,GOES,3.0,...,0,1,1,,,-2.0,3.0,16.0,,


## FAIs asociados a flares

In [59]:
# Calculo de FAIs asociados o no a flares:
# Solo busca flares despu√©s del FAI
df_fai_assoc = associate_fai_to_flare_dataframes(df_fai_selected=df_fai_selected_calculate,
                                                df_flares=df_flares_valid,
                                                window_minutes=window_minutes,
                                                include_inside=False)


# nombre de archivo
file_fai_assoc  = os.path.join(
    analysis_dir,
    f"df_fai_assoc_{method}_(W_{window_minutes})_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv")

# Guardar DataFrame de anticipaci√≥n
df_fai_assoc.to_csv(file_fai_assoc, index=False)


print(f"‚úÖ df_fai_assoc guardado correctamente en:\n{file_fai_assoc}")


Usando columna de tiempo FAI: date
Procesando 20598 alertas FAI...

--- Estad√≠sticas de Asociaci√≥n FAI-Flare ---
Total FAIs: 20598
FAIs asociados: 9882 (48.0%)
Ventana: 30 minutos hacia adelante
Incluir FAIs dentro de flares activos: False
Tiempo medio a flare: 10.5 min
Tiempo m√≠nimo: 0.0 min
Tiempo m√°ximo: 30.0 min

Distribuci√≥n por clase:
  M1.0: 367
  M1.3: 208
  M1.2: 166
  C2.4: 162
  C3.4: 160
  M1.4: 157
  C3.0: 153
  M1.5: 151
  C2.9: 148
  C5.5: 144
  C5.1: 143
  C2.3: 143
  C2.6: 142
  C3.9: 139
  C1.9: 138
  M1.1: 128
  M1.6: 126
  C1.7: 125
  C2.7: 124
  C3.7: 122
  C3.3: 120
  C3.2: 117
  C1.8: 117
  X1.1: 113
  C2.1: 109
  C2.8: 108
  C4.6: 104
  C4.0: 101
  C3.1: 101
  C5.7: 100
  C3.6: 98
  X1.0: 98
  C5.0: 98
  C1.6: 97
  C2.5: 93
  C2.0: 93
  C5.9: 91
  C4.1: 88
  C8.9: 85
  C2.2: 84
  C4.3: 82
  X1.2: 81
  C4.4: 80
  C1.4: 79
  C7.5: 78
  C4.5: 77
  C5.4: 76
  M1.7: 75
  C7.0: 75
  C7.3: 74
  C4.2: 74
  C3.5: 73
  C5.6: 73
  C3.8: 72
  C6.8: 67
  C7.7: 65
  C6.1

In [60]:
# Tambi√©n incluye FAIs que caen dentro de un flare activo
df_fai_assoc2 = associate_fai_to_flare_dataframes(df_fai_selected=df_fai_selected_calculate,
                                                df_flares=df_flares_valid,
                                                window_minutes=window_minutes,
                                                include_inside=True)


# nombre de archivo
file_fai_assoc2  = os.path.join(
    analysis_dir,
    f"df_fai_assoc2_{method}_(W_{window_minutes})_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv")

# Guardar DataFrame de anticipaci√≥n
df_fai_assoc2.to_csv(file_fai_assoc2, index=False)

print(f"‚úÖ df_fai_assoc guardado correctamente en:\n{file_fai_assoc2}")


Usando columna de tiempo FAI: date
Procesando 20598 alertas FAI...

--- Estad√≠sticas de Asociaci√≥n FAI-Flare ---
Total FAIs: 20598
FAIs asociados: 10381 (50.4%)
Ventana: 30 minutos hacia adelante
Incluir FAIs dentro de flares activos: True
Tiempo medio a flare: 10.0 min
Tiempo m√≠nimo: 0.0 min
Tiempo m√°ximo: 30.0 min

Distribuci√≥n por clase:
  M1.0: 436
  M1.3: 232
  X1.1: 172
  M1.2: 169
  C3.6: 165
  C2.4: 162
  M1.6: 161
  C3.4: 160
  C3.0: 159
  M1.4: 157
  M1.5: 151
  C2.9: 148
  C5.5: 144
  C2.3: 143
  C5.1: 143
  C2.6: 142
  C3.9: 140
  C1.9: 138
  C3.7: 132
  M1.1: 132
  C1.7: 126
  C2.1: 126
  C3.3: 126
  C2.7: 124
  C3.1: 122
  C1.8: 117
  C3.2: 117
  C2.8: 108
  X1.0: 106
  C4.6: 104
  C4.0: 101
  C5.7: 100
  C5.0: 98
  C1.6: 97
  C4.1: 95
  C2.5: 93
  C2.0: 93
  C5.9: 91
  C8.9: 85
  C2.2: 84
  C4.3: 82
  X1.2: 81
  C4.4: 80
  C1.4: 79
  C7.5: 78
  M1.7: 77
  C4.5: 77
  C9.3: 77
  C5.4: 76
  C7.0: 76
  C7.3: 74
  C4.2: 74
  C5.6: 73
  C3.5: 73
  C3.8: 72
  C6.8: 67
  C6

In [61]:
df_fai_assoc.head()

Unnamed: 0,GOES_ID,date,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,...,F_ClassGroup,F_Observatory,F_StartPeak,F_PeakEnd,F_StartEnd,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
10,GOES_10,2000-06-06 00:17:00,GOES-16,4.194724e-08,1e-06,2.585677e-08,2.906549e-07,11.058271,1.937464e+47,9.953444,...,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,26.0,11.0,26.0,38.0
11,GOES_11,2000-06-06 00:23:00,GOES-16,4.063336e-08,1e-06,4.950806e-09,1.062962e-07,8.298321,1.054738e+47,7.279352,...,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,20.0,5.0,20.0,32.0
12,GOES_12,2000-06-06 00:24:00,GOES-16,4.321096e-08,1e-06,8.710359e-09,1.461166e-07,9.254645,1.224493e+47,8.167912,...,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,19.0,4.0,19.0,31.0
13,GOES_13,2000-06-06 00:25:00,GOES-16,4.753495e-08,1e-06,8.936986e-09,1.561961e-07,9.086959,1.344306e+47,8.010274,...,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,18.0,3.0,18.0,30.0
14,GOES_14,2000-06-06 00:26:00,GOES-16,5.612609e-08,1e-06,1.455322e-08,2.01082e-07,10.094641,1.496619e+47,8.974755,...,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,17.0,2.0,17.0,29.0


## pruebas

In [62]:
# Simulamos un DataFrame con 10 filas
data = {
    "date": pd.date_range("2025-10-29 00:00", periods=10, freq="T"),
    "T_cor": [8, 9, 9, 7, 10, 11, 12, 6, 8, 9],
    "EM_cor_norm": [0.01, 0.02, 0.01, 0.001, 0.01, 0.02, 0.015, 0.004, 0.006, 0.007],
}

df = pd.DataFrame(data)

  "date": pd.date_range("2025-10-29 00:00", periods=10, freq="T"),


In [63]:
df

Unnamed: 0,date,T_cor,EM_cor_norm
0,2025-10-29 00:00:00,8,0.01
1,2025-10-29 00:01:00,9,0.02
2,2025-10-29 00:02:00,9,0.01
3,2025-10-29 00:03:00,7,0.001
4,2025-10-29 00:04:00,10,0.01
5,2025-10-29 00:05:00,11,0.02
6,2025-10-29 00:06:00,12,0.015
7,2025-10-29 00:07:00,6,0.004
8,2025-10-29 00:08:00,8,0.006
9,2025-10-29 00:09:00,9,0.007


In [64]:
# Criterio base FAI
fai_temp_range = (7, 14)
fai_em_threshold = 0.005

In [65]:
mask = (df["T_cor"].between(*fai_temp_range)) & (df["EM_cor_norm"] > fai_em_threshold)
print("‚úÖ Mask base FAI:\n", mask)

‚úÖ Mask base FAI:
 0     True
1     True
2     True
3    False
4     True
5     True
6     True
7    False
8     True
9     True
dtype: bool


In [66]:
# PASO 1: etiquetar bloques consecutivos
mask_shift = mask.ne(mask.shift()).cumsum()
print("\nüìå Etiquetas de bloque (mask_shift):\n", mask_shift)


üìå Etiquetas de bloque (mask_shift):
 0    1
1    1
2    1
3    2
4    3
5    3
6    3
7    4
8    5
9    5
dtype: int64


In [67]:
# PASO 2: calcular tama√±o de cada bloque
block_sizes = mask.groupby(mask_shift).transform("sum")
print("\nüìå Tama√±o de cada bloque de True (block_sizes):\n", block_sizes)


üìå Tama√±o de cada bloque de True (block_sizes):
 0    3
1    3
2    3
3    0
4    3
5    3
6    3
7    0
8    2
9    2
dtype: int64


In [68]:
# PASO 3: filtrar por duraci√≥n m√≠nima
FAI_duration = 3
mask_filtered = mask & (block_sizes >= FAI_duration)
print("\nüìå Mask final filtrada por duraci√≥n >= 3:\n", mask_filtered)


üìå Mask final filtrada por duraci√≥n >= 3:
 0     True
1     True
2     True
3    False
4     True
5     True
6     True
7    False
8    False
9    False
dtype: bool


In [69]:
# Filtrar DataFrame final
df_fai_filtered = df.loc[mask_filtered]
print("\nüìå DataFrame final con criterio de duraci√≥n:\n", df_fai_filtered)


üìå DataFrame final con criterio de duraci√≥n:
                  date  T_cor  EM_cor_norm
0 2025-10-29 00:00:00      8        0.010
1 2025-10-29 00:01:00      9        0.020
2 2025-10-29 00:02:00      9        0.010
4 2025-10-29 00:04:00     10        0.010
5 2025-10-29 00:05:00     11        0.020
6 2025-10-29 00:06:00     12        0.015


## otra prueba:


In [70]:
mask = pd.Series([False, True, True, True, False, True, True, True, True])

FAI_duration = 3
mask_true_alert = mask.rolling(window=FAI_duration, min_periods=FAI_duration).sum() == FAI_duration

print(mask)
print(mask_true_alert)


0    False
1     True
2     True
3     True
4    False
5     True
6     True
7     True
8     True
dtype: bool
0    False
1    False
2    False
3     True
4    False
5    False
6    False
7     True
8     True
dtype: bool
