# Solar Flare Analysis: Traditional Flare Detection

This notebook demonstrates traditional methods for detecting solar flares in GOES XRS data and identifying overlapping flares.

## Setup and Imports

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Add the project root to the path
project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.append(project_root)

# Import project modules
from config import settings
from src.data_processing.data_loader import load_goes_data, preprocess_xrs_data, remove_background
from src.flare_detection.traditional_detection import (
    detect_flare_peaks, define_flare_bounds, detect_overlapping_flares
)
from src.visualization.plotting import plot_xrs_time_series, plot_detected_flares

## Loading and Preprocessing Data

First, we'll load a sample GOES XRS data file and preprocess it:

In [None]:
# Locate sample GOES XRS data file
data_dir = settings.DATA_DIR
sample_files = [f for f in os.listdir(data_dir) if f.endswith('.nc')]

if sample_files:
    data_file = os.path.join(data_dir, sample_files[0])
    print(f"Using {data_file} for demonstration")
    
    # Load data
    data = load_goes_data(data_file)
    
    # Preprocess B channel data
    channel = 'B'  # We'll focus on XRS-B (0.1-0.8 nm) channel
    flux_col = f'xrs{channel.lower()}'
    df = preprocess_xrs_data(data, channel=channel, remove_bad_data=True, interpolate_gaps=True)
    
    # Plot raw time series
    fig = plot_xrs_time_series(df, flux_col, title=f'GOES XRS {channel} Raw Data', log_scale=True)
    plt.tight_layout()
    plt.show()
else:
    print("No .nc files found. Please place GOES XRS data in the 'data' directory.")

## Step 1: Remove Background Flux

To isolate flare events, we need to remove the background solar flux:

In [None]:
if 'df' in locals():
    # Remove background
    df_bg = remove_background(
        df, 
        window_size=settings.BACKGROUND_PARAMS['window_size'],
        quantile=settings.BACKGROUND_PARAMS['quantile']
    )
    
    # Plot background-subtracted time series
    fig = plot_xrs_time_series(
        df_bg, 
        f'{flux_col}_no_background', 
        title=f'GOES XRS {channel} Background-Subtracted Data',
        log_scale=True
    )
    plt.tight_layout()
    plt.show()

## Step 2: Detect Flare Peaks

Let's detect peaks in the time series that might correspond to solar flares:

In [None]:
if 'df' in locals():
    # Detect peaks
    peaks = detect_flare_peaks(
        df, flux_col,
        threshold_factor=settings.DETECTION_PARAMS['threshold_factor'],
        window_size=settings.DETECTION_PARAMS['window_size']
    )
    
    print(f"Detected {len(peaks)} potential flare peaks")
    display(peaks.head())
    
    # Visualize peaks on the time series
    plt.figure(figsize=(12, 6))
    plt.semilogy(df.index, df[flux_col], 'b-', label='XRS Flux')
    plt.semilogy(df.iloc[peaks['peak_index']].index, peaks['peak_flux'], 'ro', label='Detected Peaks')
    
    plt.grid(True, which='both', linestyle='--', alpha=0.5)
    plt.xlabel('Time')
    plt.ylabel(f'Flux (W/m²)')
    plt.title(f'GOES XRS {channel} - Detected Flare Peaks')
    plt.legend()
    plt.tight_layout()
    plt.show()

## Step 3: Define Flare Boundaries

For each detected peak, we need to determine the start and end times of the flare:

In [None]:
if 'df' in locals() and 'peaks' in locals():
    # Define flare bounds
    flares = define_flare_bounds(
        df, flux_col, peaks['peak_index'].values,
        start_threshold=settings.DETECTION_PARAMS['start_threshold'],
        end_threshold=settings.DETECTION_PARAMS['end_threshold'],
        min_duration=settings.DETECTION_PARAMS['min_duration'],
        max_duration=settings.DETECTION_PARAMS['max_duration']
    )
    
    print(f"Defined bounds for {len(flares)} flares")
    display(flares.head())
    
    # Plot detected flares
    fig = plot_detected_flares(df, flux_col, flares)
    plt.tight_layout()
    plt.show()

## Step 4: Identify Overlapping Flares

Now, let's detect which flares are temporally overlapping:

In [None]:
if 'flares' in locals():
    # Detect overlapping flares
    overlapping = detect_overlapping_flares(flares, min_overlap='2min')
    
    print(f"Detected {len(overlapping)} potentially overlapping flare pairs")
    
    if overlapping:
        print("\nOverlapping flare pairs:")
        for i, j, duration in overlapping:
            print(f"  Flares {i+1} and {j+1} overlap by {duration}")
        
        # Visualize overlapping flares
        plt.figure(figsize=(14, 7))
        plt.semilogy(df.index, df[flux_col], 'k-', alpha=0.3, label='XRS Flux')
        
        # Draw non-overlapping flares
        overlapping_indices = set([i for i, _, _ in overlapping] + [j for _, j, _ in overlapping])
        non_overlapping = [i for i in range(len(flares)) if i not in overlapping_indices]
        
        for i in non_overlapping:
            start_idx = flares.iloc[i]['start_index']
            end_idx = flares.iloc[i]['end_index']
            plt.semilogy(df.iloc[start_idx:end_idx+1].index, 
                         df.iloc[start_idx:end_idx+1][flux_col], 
                         'b-', alpha=0.7)
        
        # Draw overlapping flares
        colors = ['r', 'g', 'c', 'm', 'y']
        for k, (i, j, _) in enumerate(overlapping):
            for idx, color in [(i, colors[k % len(colors)]), (j, colors[(k + 1) % len(colors)])]:
                start_idx = flares.iloc[idx]['start_index']
                end_idx = flares.iloc[idx]['end_index']
                plt.semilogy(df.iloc[start_idx:end_idx+1].index, 
                             df.iloc[start_idx:end_idx+1][flux_col], 
                             f'{color}-', alpha=0.7, 
                             label=f'Flare {idx+1}' if k == 0 else "")
        
        plt.grid(True, which='both', linestyle='--', alpha=0.5)
        plt.xlabel('Time')
        plt.ylabel(f'Flux (W/m²)')
        plt.title(f'GOES XRS {channel} - Overlapping Flares')
        plt.legend()
        plt.tight_layout()
        plt.show()

## Flare Classification

Now, let's classify the flares according to the standard GOES classification:

In [None]:
if 'flares' in locals():
    # Define GOES flare classification thresholds
    class_thresholds = {
        'A': 1e-8,
        'B': 1e-7,
        'C': 1e-6,
        'M': 1e-5,
        'X': 1e-4
    }
    
    # Function to classify a flare based on peak flux
    def classify_flare(flux):
        if flux < class_thresholds['A']:
            return '<A1.0'
        for cls in ['X', 'M', 'C', 'B', 'A']:
            if flux >= class_thresholds[cls]:
                magnitude = flux / class_thresholds[cls]
                return f"{cls}{magnitude:.1f}"
        return 'Unknown'
    
    # Add classification to each flare
    flares['classification'] = flares['peak_flux'].apply(classify_flare)
    
    # Display flare information with classification
    flares_info = flares[['start_time', 'peak_time', 'end_time', 'peak_flux', 'classification']].copy()
    flares_info['duration'] = flares_info['end_time'] - flares_info['start_time']
    display(flares_info.sort_values('peak_flux', ascending=False))
    
    # Count flares by class
    class_counts = flares['classification'].str[0].value_counts().sort_index()
    
    # Plot flare class distribution
    plt.figure(figsize=(8, 5))
    class_counts.plot(kind='bar', color='skyblue')
    plt.xlabel('Flare Class')
    plt.ylabel('Count')
    plt.title('Distribution of Flare Classes')
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

## Flare Duration Analysis

Let's analyze the duration of the detected flares:

In [None]:
if 'flares' in locals():
    # Calculate durations in minutes
    flares['duration_minutes'] = flares.apply(
        lambda row: (row['end_time'] - row['start_time']).total_seconds() / 60, 
        axis=1
    )
    
    # Plot duration distribution
    plt.figure(figsize=(10, 6))
    
    plt.hist(flares['duration_minutes'], bins=20, alpha=0.7, color='dodgerblue')
    plt.axvline(flares['duration_minutes'].median(), color='red', linestyle='--', 
                label=f'Median: {flares["duration_minutes"].median():.1f} min')
    
    plt.xlabel('Duration (minutes)')
    plt.ylabel('Number of Flares')
    plt.title('Distribution of Flare Durations')
    plt.grid(linestyle='--', alpha=0.7)
    plt.legend()
    plt.tight_layout()
    plt.show()
    
    # Duration vs peak flux
    plt.figure(figsize=(10, 6))
    
    plt.scatter(flares['peak_flux'], flares['duration_minutes'], 
                alpha=0.7, edgecolor='k', s=50)
    plt.xscale('log')
    plt.yscale('log')
    
    plt.xlabel('Peak Flux (W/m²)')
    plt.ylabel('Duration (minutes)')
    plt.title('Flare Duration vs. Peak Flux')
    plt.grid(True, which='both', linestyle='--', alpha=0.5)
    
    # Add vertical lines for class boundaries
    for cls, threshold in class_thresholds.items():
        plt.axvline(threshold, color='gray', linestyle='--', alpha=0.5)
        plt.text(threshold*1.1, plt.ylim()[0]*1.1, cls, rotation=90, alpha=0.8)
    
    plt.tight_layout()
    plt.show()

## Summary

In this notebook, we've demonstrated:

1. Traditional methods for detecting solar flares in GOES XRS data
2. How to define flare boundaries (start and end times)
3. How to identify overlapping flares that require special processing
4. Classification of flares according to the standard GOES system
5. Analysis of flare durations and their relationship with peak flux

In the next notebook, we'll explore ML-based methods for separating overlapping flares.