# Calculo FAI


## Packages

In [2]:
from datetime import datetime, timedelta
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter
from mpl_toolkits.mplot3d import Axes3D


## Funciones de FAI

### funci√≥n que llama las funciones para calcular FAI

In [3]:

def fai_from_df(df_full: pd.DataFrame,
                fai_temp_range: tuple = (7, 14),
                fai_em_threshold: float = 0.005,
                date_column: str = "date",
                duration: bool = True,
                FAI_duration: int = 3,
                filter_flare_coincidence: bool = True,
                df_flares: pd.DataFrame = None,
                flare_peak_col: str = "peak_time",
                flare_end_col: str = "end_time",
                verbose: bool = True,
            ) -> dict:
    """
    Calcula los FAI en un DataFrame, marcando tanto todos los puntos candidatos
    como los FAI ‚Äúverdaderos‚Äù seg√∫n duraci√≥n m√≠nima opcional.

    Par√°metros
    ----------
    df_full : pd.DataFrame
        DataFrame con columnas 'T_cor', 'EM_cor_norm' y una columna de fecha.
    fai_temp_range : tuple, opcional
        Rango de temperatura coronal (en MK) para el criterio FAI. Default (7, 14).
    fai_em_threshold : float, opcional
        Umbral m√≠nimo de EM_cor_norm para el criterio FAI. Default 0.005.
    date_column : str, opcional
        Nombre de la columna con las fechas. Default "date".
    duration : bool, opcional
        Si True, aplica un criterio adicional de duraci√≥n m√≠nima. Default False.
    FAI_duration : int, opcional
        Duraci√≥n m√≠nima (en pasos consecutivos) para considerar un FAI verdadero. Default 3.
    filter_flare_coincidence : bool, opcional
        Si True, filtra los FAI que coinciden temporalmente con flares. Default False.
    df_flares : pd.DataFrame, opcional
        DataFrame con informaci√≥n de flares, debe contener columnas de peak_time y end_time.
    flare_peak_col : str, opcional
        Nombre de la columna con el tiempo pico del flare. Default "peak_time".
    flare_end_col : str, opcional
        Nombre de la columna con el tiempo final del flare. Default "end_time".
    verbose : bool, opcional
        Si True, imprime informaci√≥n sobre los puntos encontrados.

    Retorna
    -------
    dict
        {
            "df_fai_all": DataFrame con todos los puntos candidatos,
            "df_fai_true": DataFrame con los FAI verdaderos seg√∫n duraci√≥n,
            "df_fai_filtered": DataFrame con FAI verdaderos filtrados (sin coincidencia con PeaKEnd de flares)
        }
    """

    # Validar columnas requeridas
    required_cols = ["T_cor", "EM_cor_norm", date_column]
    for col in required_cols:
        if col not in df_full.columns:
            raise ValueError(f"Falta la columna requerida: '{col}'")

    # Validar par√°metros de filtrado de flares
    if filter_flare_coincidence:
        if df_flares is None:
            raise ValueError("Cuando filter_flare_coincidence=True, debe proporcionar df_flares")
        required_flare_cols = [flare_peak_col, flare_end_col]
        for col in required_flare_cols:
            if col not in df_flares.columns:
                raise ValueError(f"Falta la columna requerida en df_flares: '{col}'")

    # Copia y formateo seguro de fechas
    df_full = df_full.copy()
    df_full[date_column] = pd.to_datetime(df_full[date_column], errors="coerce")
    
    if filter_flare_coincidence and df_flares is not None:
        df_flares = df_flares.copy()
        df_flares[flare_peak_col] = pd.to_datetime(df_flares[flare_peak_col], errors="coerce")
        df_flares[flare_end_col] = pd.to_datetime(df_flares[flare_end_col], errors="coerce")

    
    # 1Ô∏è‚É£ Crear columna FAI_alert: True si cumple T_cor y EM_cor_norm
    df_full["FAI_alert"] = (df_full["T_cor"].between(*fai_temp_range) & 
                            (df_full["EM_cor_norm"] > fai_em_threshold))

    # Inicializar FAI_true como False
    df_full["FAI_true"] = False

    if duration and FAI_duration > 1:
        # 2Ô∏è‚É£ Calcular diferencias de tiempo entre filas consecutivas
        df_full["delta_min"] = df_full[date_column].diff().dt.total_seconds().div(60).fillna(1)
        
        # 3Ô∏è‚É£ Crear grupos de secuencias consecutivas de alertas activas
        df_full["group_id"] = ((df_full["FAI_alert"] != df_full["FAI_alert"].shift()) |
                               (df_full["delta_min"] > 1)).cumsum()
        
        # 4Ô∏è‚É£ Calcular minutos desde el inicio del grupo
        df_full["duration_from_start"] = df_full.groupby("group_id")[date_column] \
                                             .transform(lambda x: (x - x.iloc[0]).dt.total_seconds()/60)
        
        # 5Ô∏è‚É£ FAI_true solo si la alerta est√° activa y la duraci√≥n m√≠nima en minutos se cumple
        df_full["FAI_true"] = df_full["FAI_alert"] & (df_full["duration_from_start"] >= (FAI_duration - 1))
    
    # df_fai_all: todos los candidatos
    df_fai_all = df_full[df_full["FAI_alert"]].copy()
    
    # df_fai_true: seg√∫n duraci√≥n m√≠nima
    df_fai_true = df_full[df_full["FAI_true"]].copy()

    # 6Ô∏è‚É£ Filtrar FAI que coinciden con el final de un flare
    df_fai_filtered = df_fai_true.copy()
    if filter_flare_coincidence and df_flares is not None and len(df_fai_true) > 0:
        mask_no_flare_coincidence = pd.Series(True, index=df_fai_true.index)
        
        for _, flare in df_flares.iterrows():
            peak_time = flare[flare_peak_col]
            end_time = flare[flare_end_col]
            if pd.isna(peak_time) or pd.isna(end_time):
                continue
            flare_mask = (df_fai_true[date_column] >= peak_time) & (df_fai_true[date_column] <= end_time)
            mask_no_flare_coincidence &= ~flare_mask
        
        df_fai_filtered = df_fai_true.loc[mask_no_flare_coincidence].copy()

    if verbose:
        print(f"‚úÖ Se encontraron {len(df_fai_all)} puntos candidatos a FAI (T_cor {fai_temp_range}, EM_cor_norm > {fai_em_threshold})")
        if duration and FAI_duration > 1:
            print(f"‚úÖ Se encontraron {len(df_fai_true)} FAI verdaderos con duraci√≥n m√≠nima de {FAI_duration} minutos consecutivos.")
        if filter_flare_coincidence:
            filtered_count = len(df_fai_true) - len(df_fai_filtered)
            print(f"‚úÖ Se filtraron {filtered_count} FAI encontrados entre el PeaK-End de flares, quedan {len(df_fai_filtered)} FAI filtrados.\n")

    return {
        "df_fai_full": df_full,
        "df_fai_all": df_fai_all,
        "df_fai_true": df_fai_true,
        "df_fai_filtered": df_fai_filtered
    }


### Funci√≥n para calcular la anticipaci√≥n del FAI

In [4]:
######################################
# 8. Tiempo de Anticipacion de flare #
######################################

def anticipation_fai_analysis_v2(df_fai_selected, df_flare_data,
                                start_col="StartTime",
                                peak_col="PeakTime",
                                end_col="EndTime",
                                window_minutes=30, # minutos antes del flare
                                max_prev_flare_minutes=180): # rango para calcular tiempo desde flare anterior
    """
    Analiza FAIs alrededor de cada flare y calcula varias m√©tricas:
    - cantidad de FAIs en distintas ventanas temporales
    - tiempos de anticipaci√≥n respecto a StartTime y PeakTime
    - tiempo desde el flare anterior (si est√° dentro del rango)
    Par√°metros:
    - df_fai_selected: DataFrame con FAIs (√≠ndice temporal)
    - df_flare_data: DataFrame con columnas StartTime, PeakTime, EndTime
    - window_minutes: minutos antes del flare a considerar para buscar FAIs
    - max_prev_flare_minutes: rango m√°ximo para calcular tiempo desde flare anterior

    Retorna:
    - df_flare_data con columnas nuevas:
        FAIalerts_W, FAIalerts_WStart,FAIalerts_WPeak, FAIalerts_StartPeak, FAIalerts_PeakEnd,
        AnticipationStart, AnticipationPeak, Peak_to_lastFAI, Time_since_prev_flare
    """

    df = df_flare_data.copy()
    df[start_col] = pd.to_datetime(df[start_col])
    df[peak_col] = pd.to_datetime(df[peak_col])
    df[end_col] = pd.to_datetime(df[end_col])
    fai_times = pd.to_datetime(df_fai_selected["date"]).sort_values()

    # asegurar orden temporal
    df = df.sort_values(peak_col).reset_index(drop=True)

    # columnas a crear
    results = {
    "FAIalerts_W": [],
    "FAIalerts_WStart": [],
    "FAIalerts_WPeak": [],
    "FAIalerts_StartPeak": [],
    "FAIalerts_PeakEnd": [],
    "FAIalerts_startEnd": [],
    "AnticipationStart": [],
    "AnticipationPeak": [],
    "Peak_to_lastFAI": [],
    "Time_since_prev_flare_end": [],
    "Time_since_prev_flare_peak": []
    }

    for i, row in df.iterrows():
        start_t = row[start_col]
        peak_t = row[peak_col]
        end_t = row[end_col]

        # üîπ L√≠mite inferior: EndTime del flare anterior o ventana
        if i > 0:
            prev_end = df.loc[i-1, end_col]
            prev_peak= df.loc[i-1, peak_col]
        else:
            prev_end = pd.Timestamp.min
            prev_peak=  pd.Timestamp.min

        #lower_limit = max(prev_end, peak_t - timedelta(minutes=window_minutes))
        lower_limit = max(prev_end, start_t - timedelta(minutes=window_minutes))  # l√≠mite inferior end del flare anterior o 30 min antes del start
        #lower_limit = max(prev_peak, peak_t - timedelta(minutes=window_minutes))  # l√≠mite inferior peak del flare anterior
        #lower_limit =  peak_t - timedelta(minutes=window_minutes)                # l√≠mite inferior 30 min antes del peak
        upper_limit = end_t

        # üîπ FAIs v√°lidos en la ventana general
        valid_fais = fai_times[(fai_times >= lower_limit) & (fai_times <= upper_limit)]

        # Contar FAIs por zonas
        w_total = len(valid_fais)
        w_start = len(valid_fais[valid_fais < start_t])
        w_peak = len(valid_fais[valid_fais < peak_t])
        start_peak = len(valid_fais[(valid_fais >= start_t) & (valid_fais < peak_t)])
        peak_end = len(valid_fais[(valid_fais >= peak_t) & (valid_fais <= end_t)])
        start_end = len(valid_fais[(valid_fais >= start_t) & (valid_fais <= end_t)])

        results["FAIalerts_W"].append(w_total)
        results["FAIalerts_WStart"].append(w_start)
        results["FAIalerts_WPeak"].append(w_peak)
        results["FAIalerts_StartPeak"].append(start_peak)
        results["FAIalerts_PeakEnd"].append(peak_end)
        results["FAIalerts_startEnd"].append(start_end)

        # üîπ Calcular anticipaciones
        if not valid_fais.empty:
            earliest_fai = valid_fais.min()
            last_fai = valid_fais.max()

            anticipation_start = (start_t - earliest_fai).total_seconds() / 60.0 if earliest_fai < start_t else None
            anticipation_peak = (peak_t - earliest_fai).total_seconds() / 60.0 if earliest_fai < peak_t else None
            peak_to_last_fai = (peak_t - last_fai).total_seconds() / 60.0  # puede ser negativo

        else:
            anticipation_start = anticipation_peak = peak_to_last_fai = None

        results["AnticipationStart"].append(anticipation_start)
        results["AnticipationPeak"].append(anticipation_peak)
        results["Peak_to_lastFAI"].append(peak_to_last_fai)

        # üîπ Calcular tiempo desde el flare anterior
        if i > 0:
            prev_end_time = df.loc[i-1, end_col]       # flare anterior: EndTime
            prev_peak_time = df.loc[i-1, peak_col]     # flare anterior: PeakTime

            delta_end = (peak_t - prev_end_time).total_seconds() / 60.0
            delta_peak = (peak_t - prev_peak_time).total_seconds() / 60.0

            time_since_prev_end = delta_end if delta_end <= max_prev_flare_minutes else None
            time_since_prev_peak = delta_peak if delta_peak <= max_prev_flare_minutes else None
        else:
            time_since_prev_end = None
            time_since_prev_peak = None

        # Guardar los valores en results
        results["Time_since_prev_flare_end"].append(time_since_prev_end)
        results["Time_since_prev_flare_peak"].append(time_since_prev_peak)

    # A√±adir las nuevas columnas al dataframe
    for col, values in results.items():
        df[col] = values

    # ======================================================
    # üî∏ ESTAD√çSTICAS GLOBALES
    # ======================================================
    total_fais = len(df_fai_selected)
    total_flares = len(df)
    total_fais_in_windows = df["FAIalerts_W"].sum()

    flares_with_fais = (df["FAIalerts_W"] > 0).sum()
    flares_without_fais = total_flares - flares_with_fais  # flares sin FAI
    flares_with_fai_before_start = (df["FAIalerts_WStart"] > 0).sum()
    flares_without_fai_before_start = total_flares - flares_with_fai_before_start
    
    flares_with_fai_before_peak = (df["FAIalerts_WPeak"] > 0).sum()
    flares_without_fai_before_peak = total_flares - flares_with_fai_before_peak

    pct_flares_with_fais = 100 * flares_with_fais / total_flares if total_flares > 0 else 0
    pct_flares_without_fais = 100 * flares_without_fais / total_flares if total_flares > 0 else 0 

    # C√°lculo de porcentajes
    pct_flares_with_fais = 100 * flares_with_fais / total_flares if total_flares > 0 else 0
    pct_flares_without_fais = 100 * flares_without_fais / total_flares if total_flares > 0 else 0
    pct_flares_with_fai_before_start = 100 * flares_with_fai_before_start / total_flares if total_flares > 0 else 0
    pct_flares_without_fai_before_start = 100 * flares_without_fai_before_start / total_flares if total_flares > 0 else 0
    pct_flares_with_fai_before_peak = 100 * flares_with_fai_before_peak / total_flares if total_flares > 0 else 0
    pct_flares_without_fai_before_peak = 100 * flares_without_fai_before_peak / total_flares if total_flares > 0 else 0

    # Totales por subventana
    fai_WStart = df["FAIalerts_WStart"].sum()
    fai_WPeak = df["FAIalerts_WPeak"].sum()
    fai_StartPeak = df["FAIalerts_StartPeak"].sum()
    fai_PeakEnd = df["FAIalerts_PeakEnd"].sum()
    fai_StartEnd = df["FAIalerts_startEnd"].sum()

    # Porcentajes relativos
    def pct(x): return 100 * x / total_fais if total_fais > 0 else 0

    mean_fais_per_flare = df["FAIalerts_W"].mean()
    anticipations = df["AnticipationPeak"].dropna()
    mean_anticipation = anticipations.mean() if not anticipations.empty else np.nan
    min_anticipation = anticipations.min() if not anticipations.empty else np.nan
    max_anticipation = anticipations.max() if not anticipations.empty else np.nan

    print("\n" + "="*70)
    print("üìä RESUMEN DE AN√ÅLISIS FAI‚ÄìFLARE")
    print("="*70)
    print(f"üîπ Total de FAIs analizados: {total_fais}")
    print(f"üîπ Total de flares analizados: {total_flares}")
    print(f"üîπ FAIs encontrados dentro de ventanas: {total_fais_in_windows} ({pct(total_fais_in_windows):.1f}%)")
    print(f"üîπ Promedio de FAIs por flare: {mean_fais_per_flare:.2f}")
    print("------------------------------------------------------")
    print("üìà ESTAD√çSTICAS DE ASOCIACI√ìN:")
    print(f"   ‚Ä¢ Flares con ‚â•1 FAI (cualquier ventana): {flares_with_fais} ({pct_flares_with_fais:.1f}%)")
    print(f"   ‚Ä¢ Flares SIN FAI (ninguna ventana): {flares_without_fais} ({pct_flares_without_fais:.1f}%)")
    print(f"   ‚Ä¢ Flares con FAI ANTES del inicio: {flares_with_fai_before_start} ({pct_flares_with_fai_before_start:.1f}%)")
    print(f"   ‚Ä¢ Flares SIN FAI antes del inicio: {flares_without_fai_before_start} ({pct_flares_without_fai_before_start:.1f}%)")
    print(f"   ‚Ä¢ Flares con FAI ANTES del pico: {flares_with_fai_before_peak} ({pct_flares_with_fai_before_peak:.1f}%)")
    print(f"   ‚Ä¢ Flares SIN FAI antes del pico: {flares_without_fai_before_peak} ({pct_flares_without_fai_before_peak:.1f}%)")
    print("------------------------------------------------------")
    print("üìç DISTRIBUCI√ìN DE FAIs POR VENTANA:")
    print(f"   ‚Ä¢ Antes del inicio (WStart): {fai_WStart} ({pct(fai_WStart):.1f}%)")
    print(f"   ‚Ä¢ Antes del pico (WPeak):    {fai_WPeak} ({pct(fai_WPeak):.1f}%)")
    print(f"   ‚Ä¢ Entre inicio(inclusive) y pico:          {fai_StartPeak} ({pct(fai_StartPeak):.1f}%)")
    print(f"   ‚Ä¢ Entre pico(inclusive) y fin(inclusive):  {fai_PeakEnd} ({pct(fai_PeakEnd):.1f}%)")
    print(f"   ‚Ä¢ Entre inicio(inclusive) y fin(inclusive):{fai_StartEnd} ({pct(fai_StartEnd):.1f}%)")
    print("------------------------------------------------------")
    if not anticipations.empty:
        print(f"üî∏ Anticipaci√≥n media (respecto al pico): {mean_anticipation:.1f} min")
        print(f"üî∏ Anticipaci√≥n m√≠nima: {min_anticipation:.1f} min")
        print(f"üî∏ Anticipaci√≥n m√°xima: {max_anticipation:.1f} min")
    else:
        print("‚ö†Ô∏è No se encontraron FAIs previos para calcular anticipaciones.")
    print("="*70 + "\n")

    # ======================================================
    # üî∏ C√ÅLCULO DE TIEMPOS RELATIVOS (normalizados por el rise time StartPeak)
    # ======================================================
    if "StartPeak" in df.columns:
        df["RelAnticipation_Peak"] = df["AnticipationPeak"] / df["StartPeak"]
        df["RelAnticipation_Start"] = df["AnticipationStart"] / df["StartPeak"]
        print("‚úÖ Columnas 'RelAnticipation_Peak' y 'RelAnticipation_Start' a√±adidas (Œît / StartPeak).")
    else:
        print("‚ö†Ô∏è No se encontr√≥ la columna 'StartPeak'. No se calcularon tiempos relativos.")

    return df




### Funci√≥n para buscar todos los FAI asociados a flares

In [5]:
#
def associate_fai_to_flare_dataframes(df_fai_selected, df_flares, 
                                      window_minutes=30, include_inside=True):
    """
    Asocia cada FAI al primer flare relevante seg√∫n este orden:
    1Ô∏è‚É£ Flare cuyo PeakTime est√© despu√©s del FAI (dentro de la ventana).
    2Ô∏è‚É£ Si no hay, flare cuyo EndTime est√© despu√©s del FAI (dentro de la ventana).
    3Ô∏è‚É£ (Opcional) Si no hay, flare activo en ese momento (StartTime <= FAI <= EndTime).

    Calcula adem√°s los tiempos relativos entre FAI y Start, Peak y End.

    Par√°metros:
    - df_fai_selected: DataFrame con alertas FAI (debe tener columna 'date' o similar)
    - df_flares: DataFrame con flares (con StartTime, PeakTime, EndTime, Class, etc.)
    - window_minutes: ventana de b√∫squeda hacia adelante desde el FAI
    - include_inside: bool, si True busca tambi√©n FAIs dentro de flares activos

    Retorna:
    - df_fai: DataFrame con columnas adicionales sobre el flare asociado
    """

    import pandas as pd

    df_fai = df_fai_selected.copy()
    df_flares_copy = df_flares.copy()

    # Detectar la columna de tiempo FAI
    time_col_fai = None
    for col in ['date', 'Unnamed: 0']:
        if col in df_fai.columns:
            time_col_fai = col
            break
    if time_col_fai is None:
        raise ValueError("No se pudo identificar la columna de tiempo en df_fai_selected")

    print(f"Usando columna de tiempo FAI: {time_col_fai}")

    # Convertir tiempos a datetime
    df_fai['Time_FAI'] = pd.to_datetime(df_fai[time_col_fai])
    for c in ['StartTime', 'PeakTime', 'EndTime']:
        df_flares_copy[c] = pd.to_datetime(df_flares_copy[c])

    # Inicializar columnas
    for col in ['Associated_Flare', 'Flare_ID', 'F_StartTime', 'F_PeakTime', 'F_EndTime',
                'F_Class', 'F_ClassLetter', 'F_ClassNumber', 'F_ClassGroup', 'F_Observatory', 
                'F_StartPeak', 'F_PeakEnd', 'F_StartEnd', 'Association_Type',
                'Time_to_flare', 'FAI_to_start', 'FAI_to_peak', 'FAI_to_end']:
        df_fai[col] = None
    df_fai['Associated_Flare'] = False

    window = pd.Timedelta(minutes=window_minutes)
    associated_count = 0

    print(f"Procesando {len(df_fai)} alertas FAI...")

    # --- Bucle principal ---
    for idx, row in df_fai.iterrows():
        fai_time = row['Time_FAI']
        flare = None
        ref_type = None

        # 1Ô∏è‚É£ Buscar flare cuyo StartTime est√© despu√©s del FAI
        mask_start = (df_flares_copy['StartTime'] >= fai_time) & (df_flares_copy['StartTime'] <= fai_time + window)
        candidate_start = df_flares_copy[mask_start].sort_values('StartTime')

        if not candidate_start.empty:
            flare = candidate_start.iloc[0]
            ref_type = "StartTime"

        else:
            # 2Ô∏è‚É£ Buscar flare cuyo PeakTime est√© despu√©s del FAI
            mask_peak = (df_flares_copy['PeakTime'] >= fai_time) & (df_flares_copy['PeakTime'] <= fai_time + window)
            candidate_peak = df_flares_copy[mask_peak].sort_values('PeakTime')

            if not candidate_peak.empty:
                flare = candidate_peak.iloc[0]
                ref_type = "PeakTime"

            else:
                # 3Ô∏è‚É£ Buscar flare cuyo EndTime est√© despu√©s del FAI
                mask_end = (df_flares_copy['EndTime'] >= fai_time) & (df_flares_copy['EndTime'] <= fai_time + window)
                candidate_end = df_flares_copy[mask_end].sort_values('EndTime')

                if not candidate_end.empty:
                    flare = candidate_end.iloc[0]
                    ref_type = "EndTime"

                # 4Ô∏è‚É£ (opcional) flare activo durante el FAI
                elif include_inside:
                    mask_inside = (df_flares_copy['StartTime'] <= fai_time) & (df_flares_copy['EndTime'] >= fai_time)
                    candidate_inside = df_flares_copy[mask_inside].sort_values('StartTime')

                    if not candidate_inside.empty:
                        flare = candidate_inside.iloc[0]
                        ref_type = "Inside"

        # Si no se encontr√≥ flare, pasar al siguiente
        if flare is None:
            continue


        # --- Asociar informaci√≥n ---
        df_fai.at[idx, 'Associated_Flare'] = True
        df_fai.at[idx, 'Flare_ID'] = flare['Flare_ID']
        df_fai.at[idx, 'F_StartTime'] = flare['StartTime']
        df_fai.at[idx, 'F_PeakTime'] = flare['PeakTime']
        df_fai.at[idx, 'F_EndTime'] = flare['EndTime']
        df_fai.at[idx, 'F_Class'] = flare['Class']
        df_fai.at[idx, 'F_ClassLetter'] = flare['ClassLetter']
        df_fai.at[idx, 'F_ClassNumber'] = flare['ClassNumber']
        df_fai.at[idx, 'F_ClassGroup'] = flare['ClassGroup']
        df_fai.at[idx, 'F_Observatory'] = flare.get('Observatory', None)
        df_fai.at[idx, 'F_StartPeak'] = flare['StartPeak']
        df_fai.at[idx, 'F_PeakEnd'] = flare['PeakEnd']
        df_fai.at[idx, 'F_StartEnd'] = flare['StartEnd']
        df_fai.at[idx, 'Association_Type'] = ref_type

        # Calcular tiempos relativos (minutos)
        df_fai.at[idx, 'FAI_to_start'] = (flare['StartTime'] - fai_time).total_seconds() / 60
        df_fai.at[idx, 'FAI_to_peak'] = (flare['PeakTime'] - fai_time).total_seconds() / 60
        df_fai.at[idx, 'FAI_to_end'] = (flare['EndTime'] - fai_time).total_seconds() / 60

        if ref_type == "StartTime":
            df_fai.at[idx, 'Time_to_flare'] = df_fai.at[idx, 'FAI_to_start']
        elif ref_type == "PeakTime":
            df_fai.at[idx, 'Time_to_flare'] = df_fai.at[idx, 'FAI_to_peak']
        elif ref_type == "EndTime":
            df_fai.at[idx, 'Time_to_flare'] = df_fai.at[idx, 'FAI_to_end']
        else:  # Inside
            df_fai.at[idx, 'Time_to_flare'] = df_fai.at[idx, 'FAI_to_peak'] # me va a mostrar el tiempo del FAI al peak

        associated_count += 1
        #df_flares_copy = df_flares_copy[df_flares_copy['Flare_ID'] != flare['Flare_ID']]


    # --- Estad√≠sticas ---
    total_fai = len(df_fai)
    pct = (associated_count / total_fai) * 100

    print(f"\n--- Estad√≠sticas de Asociaci√≥n FAI-Flare ---")
    print(f"Total FAIs: {total_fai}")
    print(f"FAIs asociados: {associated_count} ({pct:.1f}%)")
    print(f"Ventana: {window_minutes} minutos hacia adelante")
    print(f"Incluir FAIs dentro de flares activos: {include_inside}")

    print("\nDistribuci√≥n por tipo de asociaci√≥n:")
    print(df_fai['Association_Type'].value_counts())

    if associated_count > 0:
        valid_times = df_fai['Time_to_flare'].dropna()
        print(f"Tiempo medio a flare: {valid_times.mean():.1f} min")
        print(f"Tiempo m√≠nimo: {valid_times.min():.1f} min")
        print(f"Tiempo m√°ximo: {valid_times.max():.1f} min")

        class_dist = df_fai[df_fai['Associated_Flare']]['F_Class'].value_counts()
        print("\nDistribuci√≥n por clase:")
        for cls, n in class_dist.items():
            print(f"  {cls}: {n}")

    return df_fai




## Parameters

In [6]:
n=185
# nuevo n√∫mero de d√≠as
new_n = 148
fecha_actual = "2025-11-11"

window_minutes=30
fai_temp_range = (7, 14)
fai_em_threshold = 0.005
date_column = "date"
duration = True
FAI_duration = 3
filter_flare_coincidence = True
method = "filtered" # "all", "true" o "filtered"

## Valid Data:

In [7]:
output_dir = f"Data_for_{n}_days"

# Path of cleaned data in csv
csv_path_full = f"{output_dir}/df_full_{new_n}_valid.csv"
# Path of valid flares: 

csv_path_flares = f"{output_dir}/df_flares_{new_n}_valid.csv"
# Verificar existencia de archivos y avisar
if not os.path.exists(csv_path_full):
    print(f"‚ö†Ô∏è No se encontr√≥ el archivo GOES: {csv_path_full}")
else:
    print(f"‚úÖ Archivo GOES encontrado: {csv_path_full}")

if not os.path.exists(csv_path_flares):
    print(f"‚ö†Ô∏è No se encontr√≥ el archivo de flares: {csv_path_flares}")
else:
    print(f"‚úÖ Archivo de flares encontrado: {csv_path_flares}")

df_full_valid = pd.read_csv(csv_path_full)
df_flares_valid = pd.read_csv(csv_path_flares)

‚úÖ Archivo GOES encontrado: Data_for_185_days/df_full_148_valid.csv
‚úÖ Archivo de flares encontrado: Data_for_185_days/df_flares_148_valid.csv


### Info data valid

In [8]:
df_flares_valid["StartTime"] = pd.to_datetime(df_flares_valid["StartTime"])
df_flares_valid["PeakTime"]  = pd.to_datetime(df_flares_valid["PeakTime"])
df_flares_valid["EndTime"]   = pd.to_datetime(df_flares_valid["EndTime"])

In [9]:
df_flares_valid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1433 entries, 0 to 1432
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Flare_ID     1433 non-null   object        
 1   StartTime    1433 non-null   datetime64[ns]
 2   PeakTime     1433 non-null   datetime64[ns]
 3   EndTime      1433 non-null   datetime64[ns]
 4   Class        1433 non-null   object        
 5   ClassLetter  1433 non-null   object        
 6   ClassNumber  1433 non-null   float64       
 7   ClassGroup   1433 non-null   object        
 8   Observatory  1433 non-null   object        
 9   StartPeak    1433 non-null   float64       
 10  PeakEnd      1433 non-null   float64       
 11  StartEnd     1433 non-null   float64       
 12  day          1433 non-null   object        
dtypes: datetime64[ns](3), float64(4), object(6)
memory usage: 145.7+ KB


In [10]:
df_full_valid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52295 entries, 0 to 52294
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   GOES_ID       52295 non-null  object 
 1   date          52295 non-null  object 
 2   day           52295 non-null  object 
 3   observatory   52295 non-null  object 
 4   xrsa          52295 non-null  float64
 5   xrsb          52295 non-null  float64
 6   xrsa_corr     52295 non-null  float64
 7   xrsb_corr     52295 non-null  float64
 8   T_cor         52295 non-null  float64
 9   EM_cor        52295 non-null  float64
 10  T_phot        52295 non-null  float64
 11  EM_phot       52295 non-null  float64
 12  EM_cor_norm   52295 non-null  float64
 13  EM_phot_norm  52295 non-null  float64
dtypes: float64(10), object(4)
memory usage: 5.6+ MB


## Elegir datos GOES que son FAI

In [11]:
result = fai_from_df(df_full=df_full_valid,
                    fai_temp_range = fai_temp_range,
                    fai_em_threshold = fai_em_threshold,
                    date_column = "date",
                    duration = duration,
                    FAI_duration = FAI_duration,
                    filter_flare_coincidence = filter_flare_coincidence,
                    df_flares = df_flares_valid,
                    flare_peak_col = "PeakTime",
                    flare_end_col = "EndTime",
                    verbose = True )

# Acceder a los resultados
df_fai_full = result["df_fai_full"]      # Todos los datos GOES con columnas de evaluaci√≥n
df_fai_all = result["df_fai_all"]        # Todos los candidatos
df_fai_true = result["df_fai_true"]      # FAI con duraci√≥n m√≠nima
df_fai_filtered = result["df_fai_filtered"]  # FAI sin coincidencia con el final de flares



‚úÖ Se encontraron 27589 puntos candidatos a FAI (T_cor (7, 14), EM_cor_norm > 0.005)
‚úÖ Se encontraron 15575 FAI verdaderos con duraci√≥n m√≠nima de 3 minutos consecutivos.
‚úÖ Se filtraron 1637 FAI encontrados entre el PeaK-End de flares, quedan 13938 FAI filtrados.



## M√©todo all, true o filtered

In [12]:
# all = todos los FAI segun cr√≠terios de EM y T
# true = todos los FAI segun cr√≠terios de EM, T y duraci√≥n del FAI activado
# filtered = todos los FAI segun cr√≠terios de EM, T, duraci√≥n del FAI activado
#            y que no est√°n entre el peak y end de una fulguraci√≥n
#method = "all"  # "all", "true" o "filtered"

method_mapping = {
    "all": ("df_fai_all", df_fai_all),
    "true": ("df_fai_true", df_fai_true),
    "filtered": ("df_fai_filtered", df_fai_filtered)
}

if method in method_mapping:
    df_name, df_fai_selected_calculate = method_mapping[method]
    print(f"M√©todo elegido: {method} ‚Üí {df_name}")
else:
    raise ValueError(f"M√©todo '{method}' no reconocido. Use 'all', 'true' o 'filtered'")

M√©todo elegido: filtered ‚Üí df_fai_filtered


## info df's

In [13]:
df_fai_full.info()     # Todos los datos GOES con columnas de evaluaci√≥n


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52295 entries, 0 to 52294
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   GOES_ID              52295 non-null  object        
 1   date                 52295 non-null  datetime64[ns]
 2   day                  52295 non-null  object        
 3   observatory          52295 non-null  object        
 4   xrsa                 52295 non-null  float64       
 5   xrsb                 52295 non-null  float64       
 6   xrsa_corr            52295 non-null  float64       
 7   xrsb_corr            52295 non-null  float64       
 8   T_cor                52295 non-null  float64       
 9   EM_cor               52295 non-null  float64       
 10  T_phot               52295 non-null  float64       
 11  EM_phot              52295 non-null  float64       
 12  EM_cor_norm          52295 non-null  float64       
 13  EM_phot_norm         52295 non-

In [14]:
df_fai_all.info()       # Todos los candidatos


<class 'pandas.core.frame.DataFrame'>
Index: 27589 entries, 8 to 52288
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   GOES_ID              27589 non-null  object        
 1   date                 27589 non-null  datetime64[ns]
 2   day                  27589 non-null  object        
 3   observatory          27589 non-null  object        
 4   xrsa                 27589 non-null  float64       
 5   xrsb                 27589 non-null  float64       
 6   xrsa_corr            27589 non-null  float64       
 7   xrsb_corr            27589 non-null  float64       
 8   T_cor                27589 non-null  float64       
 9   EM_cor               27589 non-null  float64       
 10  T_phot               27589 non-null  float64       
 11  EM_phot              27589 non-null  float64       
 12  EM_cor_norm          27589 non-null  float64       
 13  EM_phot_norm         27589 non-null 

In [15]:
df_fai_true.info()      # FAI con duraci√≥n m√≠nima



<class 'pandas.core.frame.DataFrame'>
Index: 15575 entries, 10 to 52288
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   GOES_ID              15575 non-null  object        
 1   date                 15575 non-null  datetime64[ns]
 2   day                  15575 non-null  object        
 3   observatory          15575 non-null  object        
 4   xrsa                 15575 non-null  float64       
 5   xrsb                 15575 non-null  float64       
 6   xrsa_corr            15575 non-null  float64       
 7   xrsb_corr            15575 non-null  float64       
 8   T_cor                15575 non-null  float64       
 9   EM_cor               15575 non-null  float64       
 10  T_phot               15575 non-null  float64       
 11  EM_phot              15575 non-null  float64       
 12  EM_cor_norm          15575 non-null  float64       
 13  EM_phot_norm         15575 non-null

In [16]:
df_fai_filtered.info()  # FAI sin coincidencia con el final de flares

<class 'pandas.core.frame.DataFrame'>
Index: 13938 entries, 10 to 52288
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   GOES_ID              13938 non-null  object        
 1   date                 13938 non-null  datetime64[ns]
 2   day                  13938 non-null  object        
 3   observatory          13938 non-null  object        
 4   xrsa                 13938 non-null  float64       
 5   xrsb                 13938 non-null  float64       
 6   xrsa_corr            13938 non-null  float64       
 7   xrsb_corr            13938 non-null  float64       
 8   T_cor                13938 non-null  float64       
 9   EM_cor               13938 non-null  float64       
 10  T_phot               13938 non-null  float64       
 11  EM_phot              13938 non-null  float64       
 12  EM_cor_norm          13938 non-null  float64       
 13  EM_phot_norm         13938 non-null

In [17]:
df_fai_selected_calculate.head()

Unnamed: 0,GOES_ID,date,day,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start
10,GOES_10,2000-06-06 00:17:00,2000-06-06,GOES-16,4.194724e-08,1e-06,2.585677e-08,2.906549e-07,11.058271,1.937464e+47,9.953444,5.260247e+47,0.019375,0.052602,True,True,1.0,3,2.0
13,GOES_13,2000-06-06 00:25:00,2000-06-06,GOES-16,4.753495e-08,1e-06,8.936986e-09,1.561961e-07,9.086959,1.344306e+47,8.010274,4.128245e+47,0.013443,0.041282,True,True,1.0,4,2.0
14,GOES_14,2000-06-06 00:26:00,2000-06-06,GOES-16,5.612609e-08,1e-06,1.455322e-08,2.01082e-07,10.094641,1.496619e+47,8.974755,4.320921e+47,0.014966,0.043209,True,True,1.0,4,3.0
15,GOES_15,2000-06-06 00:27:00,2000-06-06,GOES-16,6.899128e-08,2e-06,2.759433e-08,3.362277e-07,10.674135,2.33536e+47,9.55528,6.500427e+47,0.023354,0.065004,True,True,1.0,4,4.0
16,GOES_16,2000-06-06 00:28:00,2000-06-06,GOES-16,8.993616e-08,2e-06,4.93028e-08,4.731387e-07,11.832619,2.934333e+47,10.791647,7.563609e+47,0.029343,0.075636,True,True,1.0,4,5.0


In [18]:
df_fai_all.head()

Unnamed: 0,GOES_ID,date,day,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start
8,GOES_8,2000-06-06 00:15:00,2000-06-06,GOES-16,6.398151e-08,1e-06,5.200526e-08,3.744052e-07,13.319638,2.088278e+47,12.506248,4.879989e+47,0.020883,0.0488,True,False,1.0,3,0.0
9,GOES_9,2000-06-06 00:16:00,2000-06-06,GOES-16,5.146598e-08,1e-06,3.738324e-08,3.462321e-07,12.010261,2.115799e+47,10.99046,5.388108e+47,0.021158,0.053881,True,False,1.0,3,1.0
10,GOES_10,2000-06-06 00:17:00,2000-06-06,GOES-16,4.194724e-08,1e-06,2.585677e-08,2.906549e-07,11.058271,1.937464e+47,9.953444,5.260247e+47,0.019375,0.052602,True,True,1.0,3,2.0
11,GOES_11,2000-06-06 00:23:00,2000-06-06,GOES-16,4.063336e-08,1e-06,4.950806e-09,1.062962e-07,8.298321,1.054738e+47,7.279352,3.402429e+47,0.010547,0.034024,True,False,6.0,4,0.0
12,GOES_12,2000-06-06 00:24:00,2000-06-06,GOES-16,4.321096e-08,1e-06,8.710359e-09,1.461166e-07,9.254645,1.224493e+47,8.167912,3.721751e+47,0.012245,0.037218,True,False,1.0,4,1.0


In [19]:
df_fai_all["FAI_alert"].value_counts()


FAI_alert
True    27589
Name: count, dtype: int64

In [20]:
df_fai_all["FAI_true"].value_counts()


FAI_true
True     15575
False    12014
Name: count, dtype: int64

In [21]:
df_fai_true["FAI_true"].value_counts()


FAI_true
True    15575
Name: count, dtype: int64

In [22]:
df_fai_filtered["FAI_true"].value_counts()


FAI_true
True    13938
Name: count, dtype: int64

## Carpeta para guardar el FAI

In [23]:
analysis_dir = os.path.join(f"Analysis_FAI")
# Crear carpeta de an√°lisis (si no existe) y mostrar mensaje
if os.path.exists(analysis_dir ):
    print(f"‚ö†Ô∏è La carpeta de an√°lisis ya exist√≠a: {analysis_dir }")
else:
    os.makedirs(analysis_dir , exist_ok=True)
    print(f"üìÅ Carpeta de an√°lisis creada: {analysis_dir}")

# Crear subcarpeta para guardar resultados de an√°lisis
analysis_esp = os.path.join(analysis_dir, f"Analysis_FAI_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min")
# Crear carpeta de an√°lisis (si no existe) y mostrar mensaje
if os.path.exists(analysis_esp ):
    print(f"‚ö†Ô∏è La carpeta de an√°lisis ya exist√≠a: {analysis_esp }")
else:
    os.makedirs(analysis_esp , exist_ok=True)
    print(f"üìÅ Carpeta de an√°lisis creada: {analysis_esp}")

‚ö†Ô∏è La carpeta de an√°lisis ya exist√≠a: Analysis_FAI
‚ö†Ô∏è La carpeta de an√°lisis ya exist√≠a: Analysis_FAI/Analysis_FAI_T7-14_EM0.005_dur3min


## Guardar archivos

In [24]:
# nombres de archivos
file_full = os.path.join(
    analysis_esp,
    f"df_fai_full_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
)
file_all = os.path.join(
    analysis_esp,
    f"df_fai_all_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
)
file_true = os.path.join(
    analysis_esp,
    f"df_fai_true_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
)
file_filtered = os.path.join(
    analysis_esp,
    f"df_fai_filtered_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
)

# Guardar CSV
df_fai_full.to_csv(file_full, index=False)
df_fai_all.to_csv(file_all, index=False)
df_fai_true.to_csv(file_true, index=False)
df_fai_filtered.to_csv(file_filtered, index=False)

# Mostrar rutas completas
print(f"‚úÖ Todos los FAI candidatos guardados correctamente en:\n{file_full}")
print(f"‚úÖ Todos los FAI candidatos guardados correctamente en:\n{file_all}")
print(f"‚úÖ Todos los FAI verdaderos guardados correctamente en:\n{file_true}")
print(f"‚úÖ Todos los FAI filtrados guardados correctamente en:\n{file_filtered}")

‚úÖ Todos los FAI candidatos guardados correctamente en:
Analysis_FAI/Analysis_FAI_T7-14_EM0.005_dur3min/df_fai_full_T7-14_EM0.005_dur3min.csv
‚úÖ Todos los FAI candidatos guardados correctamente en:
Analysis_FAI/Analysis_FAI_T7-14_EM0.005_dur3min/df_fai_all_T7-14_EM0.005_dur3min.csv
‚úÖ Todos los FAI verdaderos guardados correctamente en:
Analysis_FAI/Analysis_FAI_T7-14_EM0.005_dur3min/df_fai_true_T7-14_EM0.005_dur3min.csv
‚úÖ Todos los FAI filtrados guardados correctamente en:
Analysis_FAI/Analysis_FAI_T7-14_EM0.005_dur3min/df_fai_filtered_T7-14_EM0.005_dur3min.csv


## Calculo de tiempos de anticipaci√≥n

In [25]:

df_anticipation_time = anticipation_fai_analysis_v2(
                                        df_fai_selected=df_fai_selected_calculate,
                                        df_flare_data=df_flares_valid,
                                        start_col="StartTime",
                                        peak_col="PeakTime",
                                        end_col="EndTime",
                                        window_minutes=window_minutes, # tiempo del peak hacia atr√°s para buscar FAIs
                                        max_prev_flare_minutes=180  # (3h)desde el peak hacia atras  para buscar flares
                                    )

# nombre de archivo
file_anticipation_time  = os.path.join(
    analysis_esp,
    f"df_anticipation_time_{method}_(W_{window_minutes})_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
)

# Guardar DataFrame de anticipaci√≥n
df_anticipation_time.to_csv(file_anticipation_time, index=False)

print(f"‚úÖ df_anticipation_time guardado correctamente en:\n{file_anticipation_time}")


üìä RESUMEN DE AN√ÅLISIS FAI‚ÄìFLARE
üîπ Total de FAIs analizados: 13938
üîπ Total de flares analizados: 1433
üîπ FAIs encontrados dentro de ventanas: 5645 (40.5%)
üîπ Promedio de FAIs por flare: 3.94
------------------------------------------------------
üìà ESTAD√çSTICAS DE ASOCIACI√ìN:
   ‚Ä¢ Flares con ‚â•1 FAI (cualquier ventana): 995 (69.4%)
   ‚Ä¢ Flares SIN FAI (ninguna ventana): 438 (30.6%)
   ‚Ä¢ Flares con FAI ANTES del inicio: 601 (41.9%)
   ‚Ä¢ Flares SIN FAI antes del inicio: 832 (58.1%)
   ‚Ä¢ Flares con FAI ANTES del pico: 995 (69.4%)
   ‚Ä¢ Flares SIN FAI antes del pico: 438 (30.6%)
------------------------------------------------------
üìç DISTRIBUCI√ìN DE FAIs POR VENTANA:
   ‚Ä¢ Antes del inicio (WStart): 2437 (17.5%)
   ‚Ä¢ Antes del pico (WPeak):    5645 (40.5%)
   ‚Ä¢ Entre inicio(inclusive) y pico:          3208 (23.0%)
   ‚Ä¢ Entre pico(inclusive) y fin(inclusive):  0 (0.0%)
   ‚Ä¢ Entre inicio(inclusive) y fin(inclusive):3208 (23.0%)
------------------

In [26]:
df_anticipation_time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1433 entries, 0 to 1432
Data columns (total 26 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Flare_ID                    1433 non-null   object        
 1   StartTime                   1433 non-null   datetime64[ns]
 2   PeakTime                    1433 non-null   datetime64[ns]
 3   EndTime                     1433 non-null   datetime64[ns]
 4   Class                       1433 non-null   object        
 5   ClassLetter                 1433 non-null   object        
 6   ClassNumber                 1433 non-null   float64       
 7   ClassGroup                  1433 non-null   object        
 8   Observatory                 1433 non-null   object        
 9   StartPeak                   1433 non-null   float64       
 10  PeakEnd                     1433 non-null   float64       
 11  StartEnd                    1433 non-null   float64     

In [27]:
len(df_anticipation_time[df_anticipation_time["FAIalerts_W"] == 0])


438

### verificaci√≥n de que no hay FAI alerts entre el peak - End de los flares

In [28]:
df_anticipation_time.sort_values(by="FAIalerts_PeakEnd", ascending=False)[:2]

Unnamed: 0,Flare_ID,StartTime,PeakTime,EndTime,Class,ClassLetter,ClassNumber,ClassGroup,Observatory,StartPeak,...,FAIalerts_StartPeak,FAIalerts_PeakEnd,FAIalerts_startEnd,AnticipationStart,AnticipationPeak,Peak_to_lastFAI,Time_since_prev_flare_end,Time_since_prev_flare_peak,RelAnticipation_Peak,RelAnticipation_Start
0,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,...,10,0,10,11.0,26.0,6.0,,,1.733333,0.733333
1,Flare_20000606_1,2000-06-06 01:30:00,2000-06-06 01:49:00,2000-06-06 02:01:00,C2.4,C,2.4,C1-4.9,GOES,19.0,...,7,0,7,,16.0,1.0,54.0,66.0,0.842105,


In [29]:
df_anticipation_time["FAIalerts_PeakEnd"].value_counts()

FAIalerts_PeakEnd
0    1433
Name: count, dtype: int64

## sort.value

In [30]:
df_anticipation_time.sort_values(by="Time_since_prev_flare_end", ascending=True)[:4]

Unnamed: 0,Flare_ID,StartTime,PeakTime,EndTime,Class,ClassLetter,ClassNumber,ClassGroup,Observatory,StartPeak,...,FAIalerts_StartPeak,FAIalerts_PeakEnd,FAIalerts_startEnd,AnticipationStart,AnticipationPeak,Peak_to_lastFAI,Time_since_prev_flare_end,Time_since_prev_flare_peak,RelAnticipation_Peak,RelAnticipation_Start
1344,Flare_20241230_1918,2024-12-30 04:29:00,2024-12-30 04:31:00,2024-12-30 04:34:00,X1.1,X,1.1,X1-4.9,GOES,2.0,...,0,0,0,,,,3.0,17.0,,
1139,Flare_20240515_1629,2024-05-15 08:13:00,2024-05-15 08:16:00,2024-05-15 08:20:00,C9.9,C,9.9,C5-9.9,GOES,3.0,...,0,0,0,,,,3.0,34.0,,
1166,Flare_20240531_1659,2024-05-31 06:34:00,2024-05-31 06:37:00,2024-05-31 06:39:00,C2.2,C,2.2,C1-4.9,GOES,3.0,...,0,0,0,,,,3.0,16.0,,
302,Flare_20030610_463,2003-06-10 16:28:00,2003-06-10 16:30:00,2003-06-10 16:32:00,M3.9,M,3.9,M1-4.9,GOES,2.0,...,0,0,0,,,,3.0,6.0,,


## FAIs asociados a flares

### busca en las ventanas no m√°s

In [31]:
# Calculo de FAIs asociados o no a flares:
# Solo busca flares despu√©s del FAI
df_fai_assoc = associate_fai_to_flare_dataframes(df_fai_selected=df_fai_selected_calculate,
                                                df_flares=df_flares_valid,
                                                window_minutes=window_minutes,
                                                include_inside=False)


# nombre de archivo
file_fai_assoc  = os.path.join(
    analysis_esp,
    f"df_fai_assoc_{method}_(W_{window_minutes})_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv")

# Guardar DataFrame de anticipaci√≥n
df_fai_assoc.to_csv(file_fai_assoc, index=False)


print(f"‚úÖ df_fai_assoc guardado correctamente en:\n{file_fai_assoc}")


Usando columna de tiempo FAI: date
Procesando 13938 alertas FAI...

--- Estad√≠sticas de Asociaci√≥n FAI-Flare ---
Total FAIs: 13938
FAIs asociados: 5533 (39.7%)
Ventana: 30 minutos hacia adelante
Incluir FAIs dentro de flares activos: False

Distribuci√≥n por tipo de asociaci√≥n:
Association_Type
StartTime    3082
PeakTime     2451
Name: count, dtype: int64
Tiempo medio a flare: 10.6 min
Tiempo m√≠nimo: 0.0 min
Tiempo m√°ximo: 30.0 min

Distribuci√≥n por clase:
  M1.0: 261
  M1.3: 137
  M1.4: 120
  C2.4: 95
  C5.5: 93
  X1.0: 93
  C3.4: 92
  M1.2: 89
  C5.1: 88
  C2.3: 80
  M1.5: 75
  C1.9: 75
  C4.0: 75
  C2.9: 73
  M1.6: 72
  C2.6: 66
  C3.5: 66
  C4.4: 65
  C3.1: 64
  C3.3: 64
  C3.2: 63
  C7.3: 61
  C3.6: 60
  C7.0: 60
  C1.7: 60
  X1.1: 60
  C3.0: 59
  M1.1: 59
  C3.7: 57
  M2.9: 56
  C5.9: 55
  C1.6: 53
  C5.7: 53
  C5.6: 51
  M2.1: 51
  C2.8: 51
  M1.7: 51
  M1.9: 50
  C6.8: 50
  C8.9: 49
  C2.1: 49
  C4.2: 48
  C7.5: 48
  C4.6: 47
  C3.9: 46
  C4.3: 46
  X1.9: 46
  C1.4: 45
  

### incluye FAIs dentro de flares activos

In [32]:
# Tambi√©n incluye FAIs que caen dentro de un flare activo
df_fai_assoc2 = associate_fai_to_flare_dataframes(df_fai_selected=df_fai_selected_calculate,
                                                df_flares=df_flares_valid,
                                                window_minutes=window_minutes,
                                                include_inside=True)


# nombre de archivo
file_fai_assoc2  = os.path.join(
    analysis_esp,
    f"df_fai_assoc2_{method}_(W_{window_minutes})_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv")

# Guardar DataFrame de anticipaci√≥n
df_fai_assoc2.to_csv(file_fai_assoc2, index=False)

print(f"‚úÖ df_fai_assoc guardado correctamente en:\n{file_fai_assoc2}")


Usando columna de tiempo FAI: date
Procesando 13938 alertas FAI...

--- Estad√≠sticas de Asociaci√≥n FAI-Flare ---
Total FAIs: 13938
FAIs asociados: 5645 (40.5%)
Ventana: 30 minutos hacia adelante
Incluir FAIs dentro de flares activos: True

Distribuci√≥n por tipo de asociaci√≥n:
Association_Type
StartTime    3082
PeakTime     2451
Inside        112
Name: count, dtype: int64
Tiempo medio a flare: 11.3 min
Tiempo m√≠nimo: 0.0 min
Tiempo m√°ximo: 72.0 min

Distribuci√≥n por clase:
  M1.0: 281
  M1.3: 137
  M1.4: 120
  C2.4: 95
  C5.5: 93
  X1.0: 93
  C3.4: 92
  M1.2: 90
  C5.1: 88
  C3.1: 81
  C2.3: 80
  C1.9: 75
  C4.0: 75
  M1.5: 75
  C2.9: 73
  M1.6: 72
  C3.7: 67
  C2.6: 66
  C3.5: 66
  X1.1: 65
  C4.4: 65
  C3.3: 65
  C2.1: 65
  C3.2: 63
  C7.3: 61
  C1.7: 61
  C3.6: 60
  C7.0: 60
  M1.1: 59
  C3.0: 59
  M2.9: 56
  C5.9: 55
  C1.6: 53
  C5.7: 53
  M2.1: 51
  C5.6: 51
  C2.8: 51
  M1.7: 51
  C6.8: 50
  M1.9: 50
  C8.9: 49
  C4.2: 48
  C7.5: 48
  C4.6: 47
  C3.9: 46
  C4.3: 46
  X1.9:

In [33]:
df_fai_assoc.iloc[10:13]

Unnamed: 0,GOES_ID,date,day,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,...,F_ClassGroup,F_Observatory,F_StartPeak,F_PeakEnd,F_StartEnd,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
22,GOES_22,2000-06-06 00:34:00,2000-06-06,GOES-16,2.892952e-07,3e-06,1.711936e-07,1e-06,12.951876,7.533189e+47,...,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,9.0,-6.0,9.0,21.0
23,GOES_23,2000-06-06 00:35:00,2000-06-06,GOES-16,2.968308e-07,3e-06,1.530787e-07,1e-06,12.485306,7.606542e+47,...,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,8.0,-7.0,8.0,20.0
24,GOES_24,2000-06-06 00:36:00,2000-06-06,GOES-16,2.911874e-07,4e-06,1.062293e-07,1e-06,11.211785,7.592916e+47,...,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,7.0,-8.0,7.0,19.0


In [34]:
df_fai_assoc2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13938 entries, 10 to 52288
Data columns (total 38 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   GOES_ID              13938 non-null  object        
 1   date                 13938 non-null  datetime64[ns]
 2   day                  13938 non-null  object        
 3   observatory          13938 non-null  object        
 4   xrsa                 13938 non-null  float64       
 5   xrsb                 13938 non-null  float64       
 6   xrsa_corr            13938 non-null  float64       
 7   xrsb_corr            13938 non-null  float64       
 8   T_cor                13938 non-null  float64       
 9   EM_cor               13938 non-null  float64       
 10  T_phot               13938 non-null  float64       
 11  EM_phot              13938 non-null  float64       
 12  EM_cor_norm          13938 non-null  float64       
 13  EM_phot_norm         13938 non-null