## Packages

In [41]:
from datetime import datetime, timedelta
import pandas as pd
import os
from sunpy.net import Fido, attrs as a
from sunpy.timeseries import TimeSeries
from sunpy.timeseries.sources.goes import XRSTimeSeries
import astropy.units as u
from sunkit_instruments.goes_xrs import calculate_temperature_em
import matplotlib.pyplot as plt
import numpy as np
from sunpy.data import manager
import netCDF4 as nc
import os
import pandas as pd
from datetime import datetime, timedelta
import copy
from matplotlib import colormaps
list(colormaps)
#from colorspacious import cspace_converter
import matplotlib as mpl
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import cm
from matplotlib.colors import ListedColormap
import matplotlib.dates as mdates
from matplotlib.ticker import LogFormatter
from matplotlib.ticker import LogFormatterMathtext
from matplotlib.ticker import ScalarFormatter
from matplotlib.ticker import FuncFormatter
from IPython.display import HTML, display
import matplotlib.pyplot as plt
import seaborn as sns
import re
import glob

## Funciones gr√°ficas

In [42]:
# Plot with ticks every minute
def plot_fai_and_flare_lines(df_combined, df_fai_selected, df_fai_all, df_flare_data, 
                              flare_start_time, flare_end_time, graphics_dir,
                              ymin=1e-9, ymax=1e-4):
    """
    Plots XRS A and B on a logarithmic scale with vertical lines for FAI events and flare times.
    df_fai_all: all candidate FAI events
    df_fai_selected: FAI events selected by a method
    """
    import matplotlib.dates as mdates
    import os
    
    # Ensure time types
    flare_start_time = pd.to_datetime(flare_start_time)
    flare_end_time = pd.to_datetime(flare_end_time)
    
    print(f"Buscando datos entre: {flare_start_time} y {flare_end_time}")

    # Convert date columns to datetime if they aren't already
    df_combined['date'] = pd.to_datetime(df_combined['date'])
    df_fai_selected['date'] = pd.to_datetime(df_fai_selected['date'])
    df_fai_all['date'] = pd.to_datetime(df_fai_all['date'])
    df_flare_data['StartTime'] = pd.to_datetime(df_flare_data['StartTime'])
    df_flare_data['PeakTime'] = pd.to_datetime(df_flare_data['PeakTime'])
    df_flare_data['EndTime'] = pd.to_datetime(df_flare_data['EndTime'])

    # DEBUG: Verificar rangos de fechas en los dataframes
    print(f"Rango df_combined: {df_combined['date'].min()} a {df_combined['date'].max()}")
    print(f"Rango df_fai_selected: {df_fai_selected['date'].min()} a {df_fai_selected['date'].max()}")
    print(f"Rango df_flare_data PeakTime: {df_flare_data['PeakTime'].min()} a {df_flare_data['PeakTime'].max()}")

    # Filter data based on date column - m√©todo m√°s robusto
    mask_combined = (df_combined['date'] >= flare_start_time) & (df_combined['date'] <= flare_end_time)
    df_plot = df_combined.loc[mask_combined].copy()
    
    mask_fai = (df_fai_selected['date'] >= flare_start_time) & (df_fai_selected['date'] <= flare_end_time)
    df_fai_interval = df_fai_selected.loc[mask_fai].copy()

    mask_fai_all = (df_fai_all['date'] >= flare_start_time) & (df_fai_all['date'] <= flare_end_time)
    df_fai_all_interval = df_fai_all.loc[mask_fai_all].copy()


    # DEBUG: Verificar cu√°ntos datos encontramos
    print(f"Datos en df_plot: {len(df_plot)}")
    print(f"Datos en df_fai_interval: {len(df_fai_interval)}")

    # Si no hay datos en el rango, mostrar advertencia
    if len(df_plot) == 0:
        print(f"¬°ADVERTENCIA! No hay datos en df_combined para el rango {flare_start_time} a {flare_end_time}")
        return

    plt.rcParams.update({
        "axes.titlesize": 16, "axes.labelsize": 16, 
        "xtick.labelsize": 15, "ytick.labelsize": 15, 
        "legend.fontsize": 13
    })

    # Create figure
    fig, ax = plt.subplots(figsize=(13, 5))

    # Plot XRS A and B usando el √≠ndice num√©rico para evitar problemas con fechas
    x_vals = range(len(df_plot))  # Usar √≠ndice num√©rico para el plotting
    ax.plot(x_vals, df_plot['xrsa'], label='XRS A', color='blue', linewidth=1)
    ax.plot(x_vals, df_plot['xrsb'], label='XRS B', color='red', linewidth=1)

    # Log scale and Y-axis limits
    ax.set_yscale('log')
    ax.set_ylim(ymin, ymax)
    ax.set_ylabel('X-ray Flux [W/m¬≤]')
    ax.set_xlabel(f"Time [UTC] ‚Äì {flare_start_time:%d %b %Y}")
    ax.set_title(f'XRS A and B with FAI and Flare Times\n{flare_start_time:%Y-%m-%d %H:%M} to {flare_end_time:%H:%M}')
    ax.grid(True, alpha=0.3)
    
    # -----------------------
    # Configure X axis ticks
    # -----------------------
    # Crear un array de minutos dentro del intervalo de flare
    start_time = flare_start_time
    end_time = flare_end_time

    # Generar ticks mayores cada 10 minutos
    major_times = pd.date_range(start=start_time.ceil('10min'), end=end_time, freq='10min')
    major_indices = [(np.abs(df_plot['date'] - t)).argmin() for t in major_times]
    major_labels = [t.strftime('%H:%M') for t in major_times]

    # Generar ticks menores cada 1 minuto
    minor_times = pd.date_range(start=start_time.ceil('1min'), end=end_time, freq='1min')
    minor_indices = [(np.abs(df_plot['date'] - t)).argmin() for t in minor_times]

    # Asignar ticks al eje X
    ax.set_xticks(major_indices)
    ax.set_xticklabels(major_labels)
    ax.set_xticks(minor_indices, minor=True)  # minor ticks sin etiquetas
    

    # -----------------------
    # Plot all FAI candidates (background)
    # -----------------------
    for i, fai_time in enumerate(df_fai_all_interval['date']):
        idx = (np.abs(df_plot['date'] - fai_time)).argmin()
        ax.axvline(x=idx, color='#FF69B4', linestyle='-', linewidth=1.5, alpha=0.7,
                   label='FAI Candidate' if i == 0 else "")


    # Red vertical lines: FAI events
    for fai_time in df_fai_interval['date']:
        # Encontrar la posici√≥n en el plot para este tiempo FAI
        time_diff = np.abs(df_plot['date'] - fai_time)
        if len(time_diff) > 0:
            idx = time_diff.argmin()
            ax.axvline(x=idx, color='red', linestyle='-', linewidth=1.5, alpha=0.7,
                      label='FAI Alert' if fai_time == df_fai_interval['date'].iloc[0] else "")

    # Encontrar fulguraciones en el intervalo de tiempo
    flares_in_interval = df_flare_data[
        (df_flare_data['PeakTime'] >= flare_start_time) & 
        (df_flare_data['PeakTime'] <= flare_end_time)
    ]
    
    print(f"Fulguraciones encontradas en el intervalo: {len(flares_in_interval)}")

    # Colores y estilos para los diferentes tiempos de las fulguraciones
    for idx, (_, row) in enumerate(flares_in_interval.iterrows()):
        start_time = row['StartTime']
        peak_time = row['PeakTime']
        end_time = row['EndTime']
        flare_class = row.get('Class', '')
        
        # Funci√≥n para encontrar la posici√≥n en el plot
        def find_time_position(time_val):
            time_diff = np.abs(df_plot['date'] - time_val)
            if len(time_diff) > 0:
                return time_diff.argmin()
            return None
        
        # L√≠nea verde punteada para StartTime
        if flare_start_time <= start_time <= flare_end_time:
            start_pos = find_time_position(start_time)
            if start_pos is not None:
                ax.axvline(x=start_pos, color='green', linestyle=(0, (5, 3)), linewidth=2, alpha=0.8,
                          label='Flare Start' if idx == 0 else "")
        
        # L√≠nea azul discontinua para PeakTime
        if flare_start_time <= peak_time <= flare_end_time:
            peak_pos = find_time_position(peak_time)
            if peak_pos is not None:
                #ax.axvline(x=peak_pos, color='blue', linestyle='--', linewidth=2, alpha=0.8,
                ax.axvline(x=peak_pos, color='blue', linestyle=(0, (5, 3)), linewidth=2, alpha=0.8,
                          label='Flare Peak' if idx == 0 else "")
                
                # A√ëADIR FLECHA QUE SE√ëALA EL PEAK
                # Encontrar el valor de flujo en el peak time
                peak_flux = None
                peak_time_diff = np.abs(df_plot['date'] - peak_time)
                if len(peak_time_diff) > 0:
                    peak_idx = peak_time_diff.argmin()
                    # Usar el mayor flujo entre XRS A y XRS B
                    peak_flux = max(df_plot['xrsa'].iloc[peak_idx], df_plot['xrsb'].iloc[peak_idx])
                

        
        # L√≠nea amarilla punteada para EndTime
        if flare_start_time <= end_time <= flare_end_time:
            end_pos = find_time_position(end_time)
            if end_pos is not None:
                ax.axvline(x=end_pos, color='orange', linestyle='--', linewidth=3, alpha=0.8,
                          label='Flare End' if idx == 0 else "")
        
        # Anotaci√≥n con la clase de la fulguraci√≥n Y FLECHA
        if flare_class and (flare_start_time <= peak_time <= flare_end_time):
            peak_pos = find_time_position(peak_time)
            if peak_pos is not None:
                ax.annotate(flare_class,
                            xy=(peak_pos, ymin * 5),  # Punto destino
                            xytext=(peak_pos + 1, ymin * 15),  # Punto origen (m√°s a la derecha y arriba)
                            fontsize=9,
                            color='darkblue',
                            rotation=0,
                            ha='left',
                            va='center',
                            bbox=dict(boxstyle="round,pad=0.3", fc="lightyellow", ec="darkblue", lw=1, alpha=0.8),
                            arrowprops=dict(arrowstyle='->',  # Flecha
                                          color='darkblue',
                                          lw=1,
                                          alpha=0.7,
                                          shrinkA=5,  # Espacio en el origen
                                          shrinkB=0))  # Sin espacio en el destino

    # Final layout with proper legend handling
    handles, labels = ax.get_legend_handles_labels()
    by_label = dict(zip(labels, handles))  # Remove duplicates
    ax.legend(by_label.values(), by_label.keys(), loc="upper right")
    

    fig.tight_layout()

    # Create graphics directory if it doesn't exist
    #graphics_dir = "graphics"
    #os.makedirs(graphics_dir, exist_ok=True)

    #en esta funci√≥n se recibe el directorio como par√°metro
    
    output_name = f"FAI_{flare_start_time:%Y%m%d_%H%M}_{flare_end_time:%H%M}.png"
    output_path = os.path.join(graphics_dir, output_name)
    plt.savefig(output_path, dpi=300, bbox_inches="tight")
    plt.show()


#gr√°fica ce flux vs EM y T
def plot_xrs_vs_te_simple(days, 
                         df_full_valid, 
                         df_flares_valid, 
                         time_column="PeakTime",
                         XRS="xrsb_corr", T="T_cor", EM="EM_cor_norm",
                         color_by=None,
                         palette="viridis",  
                         class_to_color=None):
    """
    Versi√≥n simplificada que asume que podemos hacer merge por tiempo
    """
    
    # Convertir a datetime
    df_full_valid['date'] = pd.to_datetime(df_full_valid['date'])
    df_flares_valid[time_column] = pd.to_datetime(df_flares_valid[time_column])
    
    # Hacer merge de los dataframes
    df_merged = pd.merge(df_flares_valid, df_full_valid, 
                        left_on=time_column, 
                        right_on='date', 
                        how='inner')
    
    if len(df_merged) == 0:
        print("No hay coincidencias entre los dataframes.")
        return
    
    # Filtrar datos v√°lidos
    valid_data = df_merged.dropna(subset=[XRS, T, EM])
    
    if len(valid_data) == 0:
        print("No hay datos v√°lidos despu√©s de filtrar NaNs.")
        return
    
    print(f"Datos v√°lidos para graficar: {len(valid_data)}")
    
    # Preparar datos
    xrs_vals = valid_data[XRS].values
    em_vals = valid_data[EM].values
    t_vals = valid_data[T].values
    
    # Preparar labels para colores
    if color_by == "Class":
        flare_labels = valid_data["Class"].astype(str).str[0].tolist()  # solo letra B,C,M,X
    elif color_by == "Date":
        flare_labels = valid_data[time_column].dt.date.astype(str).tolist()
    elif color_by == "observatory":
        flare_labels = valid_data["observatory"].fillna("Unknown").tolist()
    else:
        flare_labels = [""] * len(valid_data)

    # --- Colores seg√∫n selecci√≥n ---
    if color_by:
        labels = sorted(set(flare_labels))
        if class_to_color is None:
            cmap = plt.get_cmap(palette, len(labels))
            class_to_color = {lab: cmap(i) for i, lab in enumerate(labels)}
        colors = [class_to_color[lab] for lab in flare_labels]
    else:
        labels = []
        colors = "blue"

    # Configuraci√≥n de plot
    plt.rcParams.update({
        "axes.titlesize": 17, "axes.labelsize": 15,
        "xtick.labelsize": 16, "ytick.labelsize": 16,
        "legend.fontsize": 16
    })

    fig, axes = plt.subplots(1, 2, figsize=(15, 5), constrained_layout=True)

    # XRS vs EM
    axes[0].scatter(em_vals, xrs_vals, c=colors, alpha=0.7, s=50)
    axes[0].set_xlabel(rf"EM$_{{49}}$ [cm$^{{-3}}$]")
    axes[0].set_ylabel(rf"{XRS} [W/m$^2$]")
    axes[0].set_title(f"X-ray flux vs Emission Measure\n{days} days ({time_column})", pad=20)
    axes[0].set_xscale("log")
    axes[0].set_yscale("log")
    axes[0].set_xlim(0.001, None)
    
    # XRS vs T
    axes[1].scatter(t_vals, xrs_vals, c=colors, alpha=0.7, s=50)
    axes[1].set_xlabel(f"T [MK]")
    axes[1].set_ylabel(rf"{XRS} [W/m$^2$]")
    axes[1].set_title(f"X-ray flux vs Temperature\n{days} days ({time_column})", pad=20)
    axes[1].set_xscale("linear")
    axes[1].set_yscale("log")
    axes[1].set_xlim(0, 30)

    # Leyenda
    if color_by and labels:
        handles = [plt.Line2D([], [], marker="o", color=class_to_color[lab], linestyle="", 
                             label=str(lab), markersize=12) for lab in labels]
        
        n_labels = len(labels)

        # Obtener posici√≥n de los subplots combinados (en coordenadas de la figura)
        pos0 = axes[0].get_position()
        pos1 = axes[1].get_position()
        subplot_left = pos0.x0
        subplot_right = pos1.x1
        subplot_width = subplot_right - subplot_left
        
        # N√∫mero de columnas din√°mico seg√∫n ancho de subplots
        items_per_row = max(1, int(subplot_width * 12))  # escala arbitraria, ajustar si se quiere
        ncol = min(n_labels, items_per_row)
        n_rows = int(np.ceil(n_labels / ncol))

        fig.legend(handles=handles, title=color_by,
                  loc="upper center", bbox_to_anchor=(0.5, -0.05),
                  ncol=min(4, len(labels)), frameon=False,
                  title_fontsize=17) #tama√±o de t√≠tulo

    # Guardar
    output_name = f"{days}d_{XRS}_vs_{T}_{EM}_{time_column}_{color_by}.png"
    output_path = os.path.join(graphics_dir, output_name)
    plt.savefig(output_path, dpi=300, bbox_inches="tight")
    plt.show()


##

## Carpeta para gr√°ficas

In [43]:
n=185
window_minutes=30
fai_temp_range = (7, 14)
fai_em_threshold = 0.005
date_column = "date"
duration = True
FAI_duration = 3
filter_flare_coincidence = True
method = "filtered" # "all", "true" o "filtered"

# Obtener fecha actual en formato YYYY-MM-DD
#fecha_actual = datetime.now().strftime("%Y-%m-%d")
fecha_actual = "2025-11-04"
window_minutes=30
output_dir = f"{fecha_actual}_Analysis_for_{n}_days/GraphicsFAI(W_{window_minutes})_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{duration}_{FAI_duration}"

folder = f"Graphics_{method}"

# Crear subcarpeta para guardar resultados de an√°lisis
graphics_dir = os.path.join(output_dir, folder)
# Crear subcarpeta para guardar resultados de an√°lisis (si no existe) y mostrar mensaje
if os.path.exists(graphics_dir):
    print(f"‚ö†Ô∏è La carpeta de an√°lisis ya exist√≠a: {graphics_dir}")
else:
    os.makedirs(graphics_dir , exist_ok=True)
    print(f"üìÅ Carpeta de an√°lisis creada: {graphics_dir}")



‚ö†Ô∏è La carpeta de an√°lisis ya exist√≠a: 2025-11-04_Analysis_for_185_days/GraphicsFAI(W_30)_T7-14_EM0.005_durTrue_3/Graphics_filtered


## llamar datos

In [44]:
# Path of cleaned data in csv
csv_path_full_valid = f"{fecha_actual}_Analysis_for_{n}_days/df_full_{n}_valid.csv"
# Path of valid flares: 
csv_path_flares = f"{fecha_actual}_Analysis_for_{n}_days/df_flares_{n}_valid.csv"
# Path of df_full
csv_path_full = f"{fecha_actual}_Analysis_for_{n}_days/Analysis_FAI/df_full_fai_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"


# Path of anticipation_time
csv_path_anticipation_time = f"{fecha_actual}_Analysis_for_{n}_days/Analysis_FAI/df_anticipation_time_{method}_(W_{window_minutes})_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
# Path of fai_all
csv_path_fai_all = f"{fecha_actual}_Analysis_for_{n}_days/Analysis_FAI/df_fai_all_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
# Path of fai_selected
csv_path_fai_selected = f"{fecha_actual}_Analysis_for_{n}_days/Analysis_FAI/df_fai_{method}_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
# Path of df_fai_assoc
csv_path_fai_assoc = f"{fecha_actual}_Analysis_for_{n}_days/Analysis_FAI/df_fai_assoc_{method}_(W_{window_minutes})_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"
# Path of df_fai_assoc
csv_path_fai_assoc2 = f"{fecha_actual}_Analysis_for_{n}_days/Analysis_FAI/df_fai_assoc2_{method}_(W_{window_minutes})_T{fai_temp_range[0]}-{fai_temp_range[1]}_EM{fai_em_threshold}_dur{FAI_duration}min.csv"


df_full_valid = pd.read_csv(csv_path_full_valid)
df_flares_valid = pd.read_csv(csv_path_flares)
df_anticipation_time = pd.read_csv(csv_path_anticipation_time)
df_fai_all = pd.read_csv(csv_path_fai_all)
df_fai_selected = pd.read_csv(csv_path_fai_selected)
df_full = pd.read_csv(csv_path_full)
df_fai_assoc = pd.read_csv(csv_path_fai_assoc)
df_fai_assoc2 = pd.read_csv(csv_path_fai_assoc2)

## Info dfs

In [45]:
df_full_valid["date"] = pd.to_datetime(df_full_valid["date"])
df_flares_valid["StartTime"] = pd.to_datetime(df_flares_valid["StartTime"])
df_flares_valid["PeakTime"]  = pd.to_datetime(df_flares_valid["PeakTime"])
df_flares_valid["EndTime"]   = pd.to_datetime(df_flares_valid["EndTime"])
df_anticipation_time["StartTime"] = pd.to_datetime(df_anticipation_time["StartTime"])
df_anticipation_time["PeakTime"]  = pd.to_datetime(df_anticipation_time["PeakTime"])
df_anticipation_time["EndTime"]   = pd.to_datetime(df_anticipation_time["EndTime"])
df_fai_selected["date"] = pd.to_datetime(df_fai_selected["date"])

In [46]:
df_full_valid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64604 entries, 0 to 64603
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   GOES_ID       64604 non-null  object        
 1   date          64604 non-null  datetime64[ns]
 2   observatory   64604 non-null  object        
 3   xrsa          64604 non-null  float64       
 4   xrsb          64604 non-null  float64       
 5   xrsa_corr     64604 non-null  float64       
 6   xrsb_corr     64604 non-null  float64       
 7   T_cor         64604 non-null  float64       
 8   EM_cor        64604 non-null  float64       
 9   T_phot        64604 non-null  float64       
 10  EM_phot       64604 non-null  float64       
 11  EM_cor_norm   64604 non-null  float64       
 12  EM_phot_norm  64604 non-null  float64       
dtypes: datetime64[ns](1), float64(10), object(2)
memory usage: 6.4+ MB


In [47]:
df_flares_valid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2020 entries, 0 to 2019
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Flare_ID     2020 non-null   object        
 1   StartTime    2020 non-null   datetime64[ns]
 2   PeakTime     2020 non-null   datetime64[ns]
 3   EndTime      2020 non-null   datetime64[ns]
 4   Class        2020 non-null   object        
 5   ClassLetter  2020 non-null   object        
 6   ClassNumber  2020 non-null   float64       
 7   ClassGroup   2020 non-null   object        
 8   Observatory  2020 non-null   object        
 9   StartPeak    2020 non-null   float64       
 10  PeakEnd      2020 non-null   float64       
 11  StartEnd     2020 non-null   float64       
dtypes: datetime64[ns](3), float64(4), object(5)
memory usage: 189.5+ KB


In [48]:
df_anticipation_time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2020 entries, 0 to 2019
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Flare_ID                    2020 non-null   object        
 1   StartTime                   2020 non-null   datetime64[ns]
 2   PeakTime                    2020 non-null   datetime64[ns]
 3   EndTime                     2020 non-null   datetime64[ns]
 4   Class                       2020 non-null   object        
 5   ClassLetter                 2020 non-null   object        
 6   ClassNumber                 2020 non-null   float64       
 7   ClassGroup                  2020 non-null   object        
 8   Observatory                 2020 non-null   object        
 9   StartPeak                   2020 non-null   float64       
 10  PeakEnd                     2020 non-null   float64       
 11  StartEnd                    2020 non-null   float64     

In [49]:
df_fai_selected.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17308 entries, 0 to 17307
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   GOES_ID              17308 non-null  object        
 1   date                 17308 non-null  datetime64[ns]
 2   observatory          17308 non-null  object        
 3   xrsa                 17308 non-null  float64       
 4   xrsb                 17308 non-null  float64       
 5   xrsa_corr            17308 non-null  float64       
 6   xrsb_corr            17308 non-null  float64       
 7   T_cor                17308 non-null  float64       
 8   EM_cor               17308 non-null  float64       
 9   T_phot               17308 non-null  float64       
 10  EM_phot              17308 non-null  float64       
 11  EM_cor_norm          17308 non-null  float64       
 12  EM_phot_norm         17308 non-null  float64       
 13  FAI_alert            17308 non-

In [50]:
df_fai_assoc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17308 entries, 0 to 17307
Data columns (total 37 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   GOES_ID              17308 non-null  object 
 1   date                 17308 non-null  object 
 2   observatory          17308 non-null  object 
 3   xrsa                 17308 non-null  float64
 4   xrsb                 17308 non-null  float64
 5   xrsa_corr            17308 non-null  float64
 6   xrsb_corr            17308 non-null  float64
 7   T_cor                17308 non-null  float64
 8   EM_cor               17308 non-null  float64
 9   T_phot               17308 non-null  float64
 10  EM_phot              17308 non-null  float64
 11  EM_cor_norm          17308 non-null  float64
 12  EM_phot_norm         17308 non-null  float64
 13  FAI_alert            17308 non-null  bool   
 14  FAI_true             17308 non-null  bool   
 15  delta_min            17308 non-null 

## B√∫squeda individual

In [51]:
pd.set_option('display.max_columns', None)
df_fai_assoc.iloc[:3]

Unnamed: 0,GOES_ID,date,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start,Time_FAI,Associated_Flare,Flare_ID,F_StartTime,F_PeakTime,F_EndTime,F_Class,F_ClassLetter,F_ClassNumber,F_ClassGroup,F_Observatory,F_StartPeak,F_PeakEnd,F_StartEnd,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
0,GOES_10,2000-06-06 00:17:00,GOES-16,4.194724e-08,1e-06,2.585677e-08,2.906549e-07,11.058271,1.937464e+47,9.953444,5.260247e+47,0.019375,0.052602,True,True,1.0,3,2.0,2000-06-06 00:17:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,26.0,11.0,26.0,38.0
1,GOES_13,2000-06-06 00:25:00,GOES-16,4.753495e-08,1e-06,8.936986e-09,1.561961e-07,9.086959,1.344306e+47,8.010274,4.128245e+47,0.013443,0.041282,True,True,1.0,4,2.0,2000-06-06 00:25:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,18.0,3.0,18.0,30.0
2,GOES_14,2000-06-06 00:26:00,GOES-16,5.612609e-08,1e-06,1.455322e-08,2.01082e-07,10.094641,1.496619e+47,8.974755,4.320921e+47,0.014966,0.043209,True,True,1.0,4,3.0,2000-06-06 00:26:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,17.0,2.0,17.0,29.0


In [52]:
df_anticipation_time.sort_values(by="FAIalerts_W", ascending=False)[:2]

Unnamed: 0,Flare_ID,StartTime,PeakTime,EndTime,Class,ClassLetter,ClassNumber,ClassGroup,Observatory,StartPeak,PeakEnd,StartEnd,FAIalerts_W,FAIalerts_WStart,FAIalerts_WPeak,FAIalerts_StartPeak,FAIalerts_PeakEnd,FAIalerts_startEnd,AnticipationStart,AnticipationPeak,Peak_to_lastFAI,Time_since_prev_flare_end,Time_since_prev_flare_peak,RelAnticipation_Peak,RelAnticipation_Start
874,Flare_20131028_874,2013-10-28 11:32:00,2013-10-28 11:53:00,2013-10-28 12:39:00,M1.4,M,1.4,M1-4.9,GOES,21.0,46.0,67.0,29,8,29,21,0,21,8.0,29.0,1.0,147.0,151.0,1.380952,0.380952
1025,Flare_20141026_1025,2014-10-26 17:08:00,2014-10-26 17:17:00,2014-10-26 17:30:00,M1.0,M,1.0,M1-4.9,GOES,9.0,13.0,22.0,28,19,28,9,0,9,19.0,28.0,1.0,55.0,60.0,3.111111,2.111111


In [53]:
flare_id = "Flare_20000606_0"
df_fai_assoc[df_fai_assoc["Flare_ID"] == flare_id]


Unnamed: 0,GOES_ID,date,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start,Time_FAI,Associated_Flare,Flare_ID,F_StartTime,F_PeakTime,F_EndTime,F_Class,F_ClassLetter,F_ClassNumber,F_ClassGroup,F_Observatory,F_StartPeak,F_PeakEnd,F_StartEnd,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
0,GOES_10,2000-06-06 00:17:00,GOES-16,4.194724e-08,1e-06,2.585677e-08,2.906549e-07,11.058271,1.937464e+47,9.953444,5.260247e+47,0.019375,0.052602,True,True,1.0,3,2.0,2000-06-06 00:17:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,26.0,11.0,26.0,38.0
1,GOES_13,2000-06-06 00:25:00,GOES-16,4.753495e-08,1e-06,8.936986e-09,1.561961e-07,9.086959,1.344306e+47,8.010274,4.128245e+47,0.013443,0.041282,True,True,1.0,4,2.0,2000-06-06 00:25:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,18.0,3.0,18.0,30.0
2,GOES_14,2000-06-06 00:26:00,GOES-16,5.612609e-08,1e-06,1.455322e-08,2.01082e-07,10.094641,1.496619e+47,8.974755,4.320921e+47,0.014966,0.043209,True,True,1.0,4,3.0,2000-06-06 00:26:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,17.0,2.0,17.0,29.0
3,GOES_15,2000-06-06 00:27:00,GOES-16,6.899128e-08,2e-06,2.759433e-08,3.362277e-07,10.674135,2.33536e+47,9.55528,6.500427e+47,0.023354,0.065004,True,True,1.0,4,4.0,2000-06-06 00:27:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,16.0,1.0,16.0,28.0
4,GOES_16,2000-06-06 00:28:00,GOES-16,8.993616e-08,2e-06,4.93028e-08,4.731387e-07,11.832619,2.934333e+47,10.791647,7.563609e+47,0.029343,0.075636,True,True,1.0,4,5.0,2000-06-06 00:28:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,15.0,0.0,15.0,27.0
5,GOES_17,2000-06-06 00:29:00,GOES-16,1.181016e-07,2e-06,7.489061e-08,6.372383e-07,12.440628,3.766149e+47,11.479448,9.318636e+47,0.037661,0.093186,True,True,1.0,4,6.0,2000-06-06 00:29:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,14.0,-1.0,14.0,26.0
6,GOES_18,2000-06-06 00:30:00,GOES-16,1.437521e-07,2e-06,9.621716e-08,7.893004e-07,12.629187,4.602123e+47,11.696755,1.1246270000000001e+48,0.046021,0.112463,True,True,1.0,4,7.0,2000-06-06 00:30:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,13.0,-2.0,13.0,25.0
7,GOES_19,2000-06-06 00:31:00,GOES-16,1.849581e-07,2e-06,1.28832e-07,9.9934e-07,12.921346,5.713299e+47,12.036984,1.369641e+48,0.057133,0.136964,True,True,1.0,4,8.0,2000-06-06 00:31:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,12.0,-3.0,12.0,24.0
8,GOES_20,2000-06-06 00:32:00,GOES-16,2.273288e-07,3e-06,1.583375e-07,1.020797e-06,13.933207,5.502505e+47,13.236319,1.239591e+48,0.055025,0.123959,True,True,1.0,4,9.0,2000-06-06 00:32:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,11.0,-4.0,11.0,23.0
9,GOES_21,2000-06-06 00:33:00,GOES-16,2.666638e-07,3e-06,1.767276e-07,1.222432e-06,13.53838,6.732215e+47,12.765542,1.5522730000000002e+48,0.067322,0.155227,True,True,1.0,4,10.0,2000-06-06 00:33:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,10.0,-5.0,10.0,22.0


In [54]:
flare_id = "M5.3_2021-10-28T15:35:00"
df_fai_assoc[df_fai_assoc["Flare_ID"] == flare_id]


Unnamed: 0,GOES_ID,date,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start,Time_FAI,Associated_Flare,Flare_ID,F_StartTime,F_PeakTime,F_EndTime,F_Class,F_ClassLetter,F_ClassNumber,F_ClassGroup,F_Observatory,F_StartPeak,F_PeakEnd,F_StartEnd,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end


In [55]:
# Select only FAIs associated with a flare
df_assoc_true = df_fai_assoc[df_fai_assoc["Associated_Flare"] == True].copy()

# Sort from largest to smallest FAI_to_peak
df_assoc_sorted = df_assoc_true.sort_values(by="FAI_to_peak", ascending=True).reset_index(drop=True)

# Show first rows
df_assoc_sorted.head()

Unnamed: 0,GOES_ID,date,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start,Time_FAI,Associated_Flare,Flare_ID,F_StartTime,F_PeakTime,F_EndTime,F_Class,F_ClassLetter,F_ClassNumber,F_ClassGroup,F_Observatory,F_StartPeak,F_PeakEnd,F_StartEnd,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
0,GOES_36615,2014-12-20 15:05:00,GOES-13,3.218181e-07,4e-06,3.059163e-07,2.448802e-06,11.618367,1.1724560000000001e+48,10.618748,3.004906e+48,0.117246,0.300491,True,True,1.0,12000,2.0,2014-12-20 15:05:00,True,Flare_20141220_1055,2014-12-20 15:01:00,2014-12-20 15:06:00,2014-12-20 15:11:00,C2.9,C,2.9,C1-4.9,GOES,5.0,5.0,10.0,PeakTime,1.0,-4.0,1.0,6.0
1,GOES_854,2000-06-18 17:02:00,GOES-16,5.782891e-08,1e-06,5.012315e-08,4.903518e-07,11.737047,3.066106e+47,10.685563,7.955073000000001e+47,0.030661,0.079551,True,True,1.0,262,2.0,2000-06-18 17:02:00,True,Flare_20000618_19,2000-06-18 16:59:00,2000-06-18 17:03:00,2000-06-18 17:06:00,C1.4,C,1.4,C1-4.9,GOES,4.0,3.0,7.0,PeakTime,1.0,-3.0,1.0,4.0
2,GOES_64341,2025-06-19 06:50:00,GOES-18,7.394329e-07,6e-06,6.715486e-07,3.943641e-06,12.729819,1.625035e+48,11.824382,3.886756e+48,0.162503,0.388676,True,True,1.0,20638,7.0,2025-06-19 06:50:00,True,Flare_20250619_2014,2025-06-19 06:39:00,2025-06-19 06:51:00,2025-06-19 06:54:00,C5.9,C,5.9,C5-9.9,GOES,12.0,3.0,15.0,PeakTime,1.0,-11.0,1.0,4.0
3,GOES_39042,2021-10-28 21:06:00,GOES-16,8.088023e-08,1e-06,4.094409e-08,2.953017e-07,11.59133,1.333445e+47,10.552547,3.438826e+47,0.013334,0.034388,True,True,1.0,12840,2.0,2021-10-28 21:06:00,True,Flare_20211028_1153,2021-10-28 21:02:00,2021-10-28 21:07:00,2021-10-28 21:11:00,C1.5,C,1.5,C1-4.9,GOES,5.0,4.0,9.0,PeakTime,1.0,-4.0,1.0,5.0
4,GOES_39031,2021-10-28 19:44:00,GOES-16,3.160938e-07,3e-06,1.306227e-07,9.315106e-07,11.651301,4.182696e+47,10.61694,1.074496e+48,0.041827,0.10745,True,True,1.0,12835,3.0,2021-10-28 19:44:00,True,Flare_20211028_1152,2021-10-28 19:34:00,2021-10-28 19:45:00,2021-10-28 20:03:00,C3.1,C,3.1,C1-4.9,GOES,11.0,18.0,29.0,PeakTime,1.0,-10.0,1.0,19.0


In [56]:
df_fai_assoc["Association_Type"].unique()


array(['PeakTime', nan], dtype=object)

In [57]:
df_fai_assoc2["Association_Type"].unique()


array(['PeakTime', nan, 'Inside'], dtype=object)

In [58]:
df_fai_assoc["Association_Type"].value_counts()


Association_Type
PeakTime    7355
Name: count, dtype: int64

In [59]:
df_fai_assoc2["Association_Type"].value_counts()


Association_Type
PeakTime    7355
Inside       464
Name: count, dtype: int64

In [60]:
df_fai_assoc[df_fai_assoc["Association_Type"] == "EndTime"].head()


Unnamed: 0,GOES_ID,date,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start,Time_FAI,Associated_Flare,Flare_ID,F_StartTime,F_PeakTime,F_EndTime,F_Class,F_ClassLetter,F_ClassNumber,F_ClassGroup,F_Observatory,F_StartPeak,F_PeakEnd,F_StartEnd,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end


In [61]:
df_fai_assoc2[df_fai_assoc2["Association_Type"] == "Inside"].head(4)


Unnamed: 0,GOES_ID,date,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start,Time_FAI,Associated_Flare,Flare_ID,F_StartTime,F_PeakTime,F_EndTime,F_Class,F_ClassLetter,F_ClassNumber,F_ClassGroup,F_Observatory,F_StartPeak,F_PeakEnd,F_StartEnd,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
461,GOES_1655,2000-07-12 16:18:00,GOES-16,1.289071e-06,1e-05,1.166818e-07,1.402473e-06,10.738517,9.67183e+47,9.621205,2.681055e+48,0.096718,0.268105,True,True,1.0,501,2.0,2000-07-12 16:18:00,True,Flare_20000712_33,2000-07-12 16:06:00,2000-07-12 16:52:00,2000-07-12 18:06:00,M1,M,1.0,M1-4.9,GOES,46.0,74.0,120.0,Inside,0.0,-12.0,34.0,108.0
462,GOES_1656,2000-07-12 16:19:00,GOES-16,1.286965e-06,1e-05,4.868616e-08,1.227928e-06,7.741622,1.3747330000000001e+48,6.770846,4.596914e+48,0.137473,0.459691,True,True,1.0,501,3.0,2000-07-12 16:19:00,True,Flare_20000712_33,2000-07-12 16:06:00,2000-07-12 16:52:00,2000-07-12 18:06:00,M1,M,1.0,M1-4.9,GOES,46.0,74.0,120.0,Inside,0.0,-13.0,33.0,107.0
671,GOES_2424,2000-09-30 17:42:00,GOES-16,2.018862e-07,3e-06,1.071299e-07,8.141958e-07,13.029302,4.622749e+47,12.163681,1.100442e+48,0.046227,0.110044,True,True,1.0,761,2.0,2000-09-30 17:42:00,True,Flare_20000930_51,2000-09-30 17:38:00,2000-09-30 18:27:00,2000-09-30 19:05:00,M1.0,M,1.0,M1-4.9,GOES,49.0,38.0,87.0,Inside,0.0,-4.0,45.0,83.0
672,GOES_2425,2000-09-30 17:43:00,GOES-16,2.269718e-07,3e-06,1.161056e-07,8.260822e-07,13.383756,4.59015e+47,12.582159,1.0683890000000001e+48,0.045902,0.106839,True,True,1.0,761,3.0,2000-09-30 17:43:00,True,Flare_20000930_51,2000-09-30 17:38:00,2000-09-30 18:27:00,2000-09-30 19:05:00,M1.0,M,1.0,M1-4.9,GOES,49.0,38.0,87.0,Inside,0.0,-5.0,44.0,82.0


In [62]:
df_fai_assoc2[df_fai_assoc2["Association_Type"] == "Inside"].sort_values("Time_to_flare", ascending=False).head(4)


Unnamed: 0,GOES_ID,date,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start,Time_FAI,Associated_Flare,Flare_ID,F_StartTime,F_PeakTime,F_EndTime,F_Class,F_ClassLetter,F_ClassNumber,F_ClassGroup,F_Observatory,F_StartPeak,F_PeakEnd,F_StartEnd,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
461,GOES_1655,2000-07-12 16:18:00,GOES-16,1.289071e-06,1e-05,1.166818e-07,1.402473e-06,10.738517,9.67183e+47,9.621205,2.681055e+48,0.096718,0.268105,True,True,1.0,501,2.0,2000-07-12 16:18:00,True,Flare_20000712_33,2000-07-12 16:06:00,2000-07-12 16:52:00,2000-07-12 18:06:00,M1,M,1.0,M1-4.9,GOES,46.0,74.0,120.0,Inside,0.0,-12.0,34.0,108.0
462,GOES_1656,2000-07-12 16:19:00,GOES-16,1.286965e-06,1e-05,4.868616e-08,1.227928e-06,7.741622,1.3747330000000001e+48,6.770846,4.596914e+48,0.137473,0.459691,True,True,1.0,501,3.0,2000-07-12 16:19:00,True,Flare_20000712_33,2000-07-12 16:06:00,2000-07-12 16:52:00,2000-07-12 18:06:00,M1,M,1.0,M1-4.9,GOES,46.0,74.0,120.0,Inside,0.0,-13.0,33.0,107.0
671,GOES_2424,2000-09-30 17:42:00,GOES-16,2.018862e-07,3e-06,1.071299e-07,8.141958e-07,13.029302,4.622749e+47,12.163681,1.100442e+48,0.046227,0.110044,True,True,1.0,761,2.0,2000-09-30 17:42:00,True,Flare_20000930_51,2000-09-30 17:38:00,2000-09-30 18:27:00,2000-09-30 19:05:00,M1.0,M,1.0,M1-4.9,GOES,49.0,38.0,87.0,Inside,0.0,-4.0,45.0,83.0
672,GOES_2425,2000-09-30 17:43:00,GOES-16,2.269718e-07,3e-06,1.161056e-07,8.260822e-07,13.383756,4.59015e+47,12.582159,1.0683890000000001e+48,0.045902,0.106839,True,True,1.0,761,3.0,2000-09-30 17:43:00,True,Flare_20000930_51,2000-09-30 17:38:00,2000-09-30 18:27:00,2000-09-30 19:05:00,M1.0,M,1.0,M1-4.9,GOES,49.0,38.0,87.0,Inside,0.0,-5.0,44.0,82.0


In [63]:
df_fai_assoc["Time_to_flare"].unique()


array([26., 18., 17., 16., 15., 14., 13., 12., 11., 10.,  9.,  8.,  7.,
        6.,  5.,  4.,  3.,  2.,  1., nan, 27., 25., 23., 22., 21., 28.,
       24., 29., 19., 30., 20.])

In [64]:
np.sort(df_fai_assoc["Time_to_flare"].unique())

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., nan])

In [65]:

np.sort(df_fai_assoc["Time_to_flare"].dropna().unique())

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30.])

In [66]:
np.sort(df_fai_assoc["FAI_to_start"].unique())

array([-90., -89., -75., -74., -70., -69., -68., -67., -66., -65., -64.,
       -63., -62., -59., -58., -57., -56., -55., -54., -53., -52., -51.,
       -50., -49., -48., -47., -46., -45., -44., -43., -42., -41., -40.,
       -39., -38., -37., -36., -35., -34., -33., -32., -31., -30., -29.,
       -28., -27., -26., -25., -24., -23., -22., -21., -20., -19., -18.,
       -17., -16., -15., -14., -13., -12., -11., -10.,  -9.,  -8.,  -7.,
        -6.,  -5.,  -4.,  -3.,  -2.,  -1.,   0.,   1.,   2.,   3.,   4.,
         5.,   6.,   7.,   8.,   9.,  10.,  11.,  12.,  13.,  14.,  15.,
        16.,  17.,  18.,  19.,  20.,  21.,  22.,  23.,  24.,  25.,  26.,
        27.,  nan])

In [67]:
np.sort(df_fai_assoc["FAI_to_peak"].unique())

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., nan])

In [68]:
df_fai_assoc2[df_fai_assoc2["Association_Type"] == "Inside"]["Time_to_flare"].value_counts()


Time_to_flare
0.0    464
Name: count, dtype: int64

In [69]:
df_fai_assoc2[df_fai_assoc2["Association_Type"] == "Inside"] \
    .sort_values("Time_to_flare", ascending=False) \
    .head(4)[["GOES_ID", "date", "Association_Type", "Time_to_flare", "FAI_to_start",	"FAI_to_peak",	"FAI_to_end"]]


Unnamed: 0,GOES_ID,date,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
461,GOES_1655,2000-07-12 16:18:00,Inside,0.0,-12.0,34.0,108.0
462,GOES_1656,2000-07-12 16:19:00,Inside,0.0,-13.0,33.0,107.0
671,GOES_2424,2000-09-30 17:42:00,Inside,0.0,-4.0,45.0,83.0
672,GOES_2425,2000-09-30 17:43:00,Inside,0.0,-5.0,44.0,82.0


In [70]:
df_fai_assoc[df_fai_assoc2["Associated_Flare"] == True] \
    .sort_values("FAI_to_start", ascending=True) \
    .head(4)[["GOES_ID", "date", "Flare_ID", "F_StartTime", "F_EndTime" , "Association_Type", "Time_to_flare", "FAI_to_start",	"FAI_to_peak",	"FAI_to_end"]]


Unnamed: 0,GOES_ID,date,Flare_ID,F_StartTime,F_EndTime,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
5430,GOES_26866,2012-03-05 04:00:00,Flare_20120305_784,2012-03-05 02:30:00,2012-03-05 04:43:00,PeakTime,9.0,-90.0,9.0,43.0
5429,GOES_26865,2012-03-05 03:59:00,Flare_20120305_784,2012-03-05 02:30:00,2012-03-05 04:43:00,PeakTime,10.0,-89.0,10.0,44.0
3360,GOES_14858,2003-05-27 06:21:00,Flare_20030527_411,2003-05-27 05:06:00,2003-05-27 07:16:00,PeakTime,5.0,-75.0,5.0,55.0
3359,GOES_14857,2003-05-27 06:20:00,Flare_20030527_411,2003-05-27 05:06:00,2003-05-27 07:16:00,PeakTime,6.0,-74.0,6.0,56.0


In [71]:
df_fai_assoc[df_fai_assoc2["Associated_Flare"] == True] \
    .sort_values("FAI_to_peak", ascending=True) \
    .head(4)[["GOES_ID", "date", "Flare_ID", "F_StartTime", "F_EndTime" , "Association_Type", "Time_to_flare", "FAI_to_start",	"FAI_to_peak",	"FAI_to_end"]]


Unnamed: 0,GOES_ID,date,Flare_ID,F_StartTime,F_EndTime,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
8255,GOES_36615,2014-12-20 15:05:00,Flare_20141220_1055,2014-12-20 15:01:00,2014-12-20 15:11:00,PeakTime,1.0,-4.0,1.0,6.0
250,GOES_854,2000-06-18 17:02:00,Flare_20000618_19,2000-06-18 16:59:00,2000-06-18 17:06:00,PeakTime,1.0,-3.0,1.0,4.0
17193,GOES_64341,2025-06-19 06:50:00,Flare_20250619_2014,2025-06-19 06:39:00,2025-06-19 06:54:00,PeakTime,1.0,-11.0,1.0,4.0
8906,GOES_39042,2021-10-28 21:06:00,Flare_20211028_1153,2021-10-28 21:02:00,2021-10-28 21:11:00,PeakTime,1.0,-4.0,1.0,5.0


In [72]:
df_fai_assoc[
    (df_fai_assoc["Associated_Flare"] == True) & 
    (df_fai_assoc["FAI_to_peak"] == 0)
].head(4)[[
     "GOES_ID", "date", "Flare_ID", "F_StartTime", "F_EndTime",
     "Association_Type", "Time_to_flare", "FAI_to_start", "FAI_to_peak", "FAI_to_end"
 ]]


Unnamed: 0,GOES_ID,date,Flare_ID,F_StartTime,F_EndTime,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end


In [73]:
df_fai_assoc[df_fai_assoc["Association_Type"] == "EndTime"][["GOES_ID", "date", "Association_Type"]].head()


Unnamed: 0,GOES_ID,date,Association_Type


In [74]:
mask = (df_full["date"] >= "2000-06-06 23:00:00") & (df_full["date"] <= "2000-06-06 23:59:00")
df_filtered = df_full.loc[mask, ["GOES_ID", "date","T_cor", "EM_cor_norm", "FAI_alert","delta_min","group_id","FAI_true"]]
df_filtered

Unnamed: 0,GOES_ID,date,T_cor,EM_cor_norm,FAI_alert,delta_min,group_id,FAI_true
342,GOES_342,2000-06-06 23:05:00,12.007098,0.001983,False,16.0,101,False
343,GOES_343,2000-06-06 23:06:00,12.843778,0.001516,False,1.0,101,False
344,GOES_344,2000-06-06 23:07:00,12.276761,0.002343,False,1.0,101,False
345,GOES_345,2000-06-06 23:08:00,11.444807,0.003859,False,1.0,101,False
346,GOES_346,2000-06-06 23:09:00,10.846733,0.00719,True,1.0,102,False
347,GOES_347,2000-06-06 23:10:00,11.310656,0.006473,True,1.0,102,False
348,GOES_348,2000-06-06 23:11:00,9.730521,0.010382,True,1.0,102,True
349,GOES_349,2000-06-06 23:12:00,8.498064,0.011667,True,1.0,102,True
350,GOES_350,2000-06-06 23:13:00,7.834959,0.010725,True,1.0,102,True


## df_anticipation_time

In [75]:
df_anticipation_time.head()

Unnamed: 0,Flare_ID,StartTime,PeakTime,EndTime,Class,ClassLetter,ClassNumber,ClassGroup,Observatory,StartPeak,PeakEnd,StartEnd,FAIalerts_W,FAIalerts_WStart,FAIalerts_WPeak,FAIalerts_StartPeak,FAIalerts_PeakEnd,FAIalerts_startEnd,AnticipationStart,AnticipationPeak,Peak_to_lastFAI,Time_since_prev_flare_end,Time_since_prev_flare_peak,RelAnticipation_Peak,RelAnticipation_Start
0,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,14,4,14,10,0,10,11.0,26.0,6.0,,,1.733333,0.733333
1,Flare_20000606_1,2000-06-06 01:30:00,2000-06-06 01:49:00,2000-06-06 02:01:00,C2.4,C,2.4,C1-4.9,GOES,19.0,12.0,31.0,7,0,7,7,0,7,,16.0,1.0,54.0,66.0,0.842105,
2,Flare_20000606_2,2000-06-06 08:06:00,2000-06-06 08:16:00,2000-06-06 08:34:00,C2.8,C,2.8,C1-4.9,GOES,10.0,18.0,28.0,13,4,13,9,0,9,17.0,27.0,2.0,,,2.7,1.7
3,Flare_20000606_3,2000-06-06 08:47:00,2000-06-06 08:51:00,2000-06-06 08:54:00,C2.4,C,2.4,C1-4.9,GOES,4.0,3.0,7.0,0,0,0,0,0,0,,,,17.0,35.0,,
4,Flare_20000606_4,2000-06-06 11:23:00,2000-06-06 11:27:00,2000-06-06 11:29:00,C1.8,C,1.8,C1-4.9,GOES,4.0,2.0,6.0,3,0,3,3,0,3,,3.0,1.0,153.0,156.0,0.75,


In [76]:
df_anticipation_time.sort_values(by="FAIalerts_PeakEnd", ascending=False)[:2]

Unnamed: 0,Flare_ID,StartTime,PeakTime,EndTime,Class,ClassLetter,ClassNumber,ClassGroup,Observatory,StartPeak,PeakEnd,StartEnd,FAIalerts_W,FAIalerts_WStart,FAIalerts_WPeak,FAIalerts_StartPeak,FAIalerts_PeakEnd,FAIalerts_startEnd,AnticipationStart,AnticipationPeak,Peak_to_lastFAI,Time_since_prev_flare_end,Time_since_prev_flare_peak,RelAnticipation_Peak,RelAnticipation_Start
0,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,14,4,14,10,0,10,11.0,26.0,6.0,,,1.733333,0.733333
1,Flare_20000606_1,2000-06-06 01:30:00,2000-06-06 01:49:00,2000-06-06 02:01:00,C2.4,C,2.4,C1-4.9,GOES,19.0,12.0,31.0,7,0,7,7,0,7,,16.0,1.0,54.0,66.0,0.842105,


In [77]:
df_anticipation_time["FAIalerts_WPeak"].value_counts()

FAIalerts_WPeak
0     710
1     202
2     169
3     148
4     144
5     131
6      95
7      90
8      69
9      53
10     41
11     37
13     23
12     23
14     21
16     12
15     11
17      7
22      5
18      5
19      4
21      4
24      4
20      3
26      2
25      2
28      2
23      2
29      1
Name: count, dtype: int64

In [78]:
df_fai_assoc2.head()

Unnamed: 0,GOES_ID,date,observatory,xrsa,xrsb,xrsa_corr,xrsb_corr,T_cor,EM_cor,T_phot,EM_phot,EM_cor_norm,EM_phot_norm,FAI_alert,FAI_true,delta_min,group_id,duration_from_start,Time_FAI,Associated_Flare,Flare_ID,F_StartTime,F_PeakTime,F_EndTime,F_Class,F_ClassLetter,F_ClassNumber,F_ClassGroup,F_Observatory,F_StartPeak,F_PeakEnd,F_StartEnd,Association_Type,Time_to_flare,FAI_to_start,FAI_to_peak,FAI_to_end
0,GOES_10,2000-06-06 00:17:00,GOES-16,4.194724e-08,1e-06,2.585677e-08,2.906549e-07,11.058271,1.937464e+47,9.953444,5.260247e+47,0.019375,0.052602,True,True,1.0,3,2.0,2000-06-06 00:17:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,26.0,11.0,26.0,38.0
1,GOES_13,2000-06-06 00:25:00,GOES-16,4.753495e-08,1e-06,8.936986e-09,1.561961e-07,9.086959,1.344306e+47,8.010274,4.128245e+47,0.013443,0.041282,True,True,1.0,4,2.0,2000-06-06 00:25:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,18.0,3.0,18.0,30.0
2,GOES_14,2000-06-06 00:26:00,GOES-16,5.612609e-08,1e-06,1.455322e-08,2.01082e-07,10.094641,1.496619e+47,8.974755,4.320921e+47,0.014966,0.043209,True,True,1.0,4,3.0,2000-06-06 00:26:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,17.0,2.0,17.0,29.0
3,GOES_15,2000-06-06 00:27:00,GOES-16,6.899128e-08,2e-06,2.759433e-08,3.362277e-07,10.674135,2.33536e+47,9.55528,6.500427e+47,0.023354,0.065004,True,True,1.0,4,4.0,2000-06-06 00:27:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,16.0,1.0,16.0,28.0
4,GOES_16,2000-06-06 00:28:00,GOES-16,8.993616e-08,2e-06,4.93028e-08,4.731387e-07,11.832619,2.934333e+47,10.791647,7.563609e+47,0.029343,0.075636,True,True,1.0,4,5.0,2000-06-06 00:28:00,True,Flare_20000606_0,2000-06-06 00:28:00,2000-06-06 00:43:00,2000-06-06 00:55:00,C4.6,C,4.6,C1-4.9,GOES,15.0,12.0,27.0,PeakTime,15.0,0.0,15.0,27.0


In [79]:
df_fai_assoc2["Association_Type"].value_counts()

Association_Type
PeakTime    7355
Inside       464
Name: count, dtype: int64

In [80]:
np.sort(df_fai_assoc2["FAI_to_peak"].unique())

array([  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
        12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,
        23.,  24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,  33.,
        34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,  44.,
        45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.,
        56.,  57.,  58.,  59.,  60.,  61.,  62.,  63.,  64.,  65.,  66.,
        67.,  68.,  69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,  77.,
        78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,  88.,
        89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,  99.,
       103., 104., 105., 106., 107., 108., 109., 124., 125., 131., 132.,
       133., 134., 135., 136., 137., 138.,  nan])

In [81]:
df_fai_assoc2["FAI_to_peak"].value_counts()

FAI_to_peak
1.0     564
2.0     492
3.0     444
4.0     410
5.0     375
       ... 
99.0      1
95.0      1
92.0      1
94.0      1
93.0      1
Name: count, Length: 116, dtype: int64