# FITS File Handling Example

This notebook demonstrates how to use the shared utilities to work with FITS files that contain different flux column names.

In [None]:
import sys
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Add the project root to sys.path to import shared modules
sys.path.append("..")

# Import shared utilities
from shared.data_utils import load_flux_from_fits, normalize_flux, get_fits_header_info, list_fits_columns
from shared.data_utils.data_processing import load_all_fits_files

## 1. List Available FITS Files

In [None]:
# Find all FITS files in the data directory
fits_files = glob.glob("../data/**/*.fits", recursive=True)
print(f"Found {len(fits_files)} FITS files")

# Display the first few
for file in fits_files[:5]:
    print(file)

## 2. Examine FITS File Columns

Let's check what columns are available in a sample of files to see the different naming conventions.

In [None]:
# Sample a few files
sample_files = fits_files[:5] if len(fits_files) >= 5 else fits_files

for file in sample_files:
    columns = list_fits_columns(file)
    print(f"\nFile: {os.path.basename(file)}")
    print(f"Columns: {columns}")
    
    # Check if it has PDCSAP_FLUX or just FLUX
    has_pdcsap = 'PDCSAP_FLUX' in columns
    has_flux = 'FLUX' in columns
    
    if has_pdcsap and has_flux:
        print("  Has both PDCSAP_FLUX and FLUX columns")
    elif has_pdcsap:
        print("  Has PDCSAP_FLUX column")
    elif has_flux:
        print("  Has FLUX column only")
    else:
        print("  No flux columns found")

## 3. Load a Single FITS File with Flexible Column Handling

In [None]:
# Choose one file from the list
if fits_files:
    file_path = fits_files[0]
    
    # Try to load with multiple column options
    time, flux, quality, flux_column_used = load_flux_from_fits(
        file_path,
        flux_columns=['PDCSAP_FLUX', 'FLUX', 'SAP_FLUX']
    )
    
    if time is not None and flux is not None:
        print(f"Successfully loaded {file_path}")
        print(f"Used flux column: {flux_column_used}")
        print(f"Number of data points: {len(time)}")
        
        # Normalize the flux
        flux_norm = normalize_flux(flux)
        
        # Plot the light curve
        plt.figure(figsize=(12, 6))
        plt.plot(time, flux_norm, '.', markersize=2)
        plt.xlabel('Time (days)')
        plt.ylabel(f'Normalized Flux ({flux_column_used})')
        plt.title(f'Light Curve from {os.path.basename(file_path)}')
        plt.show()
    else:
        print(f"Failed to load {file_path}")

## 4. Load All FITS Files with Either Column Type

In [None]:
# Load all FITS files in the data directory
light_curves = load_all_fits_files("../data/**/*.fits")
print(f"Successfully loaded {len(light_curves)} light curves")

# Count files by flux column type
column_counts = {}
for path, data in light_curves.items():
    flux_column = data['flux_column_used']
    column_counts[flux_column] = column_counts.get(flux_column, 0) + 1
    
print("\nFlux column usage:")
for column, count in column_counts.items():
    print(f"  {column}: {count} files")

## 5. Plot Examples of Both FITS Types

In [None]:
# Find examples of each flux column type
pdcsap_example = None
flux_example = None

for path, data in light_curves.items():
    if data['flux_column_used'] == 'PDCSAP_FLUX' and pdcsap_example is None:
        pdcsap_example = path
    elif data['flux_column_used'] == 'FLUX' and flux_example is None:
        flux_example = path
        
    if pdcsap_example and flux_example:
        break

# Plot both examples side by side if available
if pdcsap_example or flux_example:
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    if pdcsap_example:
        data = light_curves[pdcsap_example]
        axes[0].plot(data['time'], data['flux_norm'], '.', markersize=2)
        axes[0].set_xlabel('Time (days)')
        axes[0].set_ylabel('Normalized Flux')
        axes[0].set_title(f'PDCSAP_FLUX Example: {os.path.basename(pdcsap_example)}')
    else:
        axes[0].text(0.5, 0.5, 'No PDCSAP_FLUX example found', 
                    ha='center', va='center', transform=axes[0].transAxes)
        
    if flux_example:
        data = light_curves[flux_example]
        axes[1].plot(data['time'], data['flux_norm'], '.', markersize=2)
        axes[1].set_xlabel('Time (days)')
        axes[1].set_ylabel('Normalized Flux')
        axes[1].set_title(f'FLUX Example: {os.path.basename(flux_example)}')
    else:
        axes[1].text(0.5, 0.5, 'No FLUX example found', 
                   ha='center', va='center', transform=axes[1].transAxes)
        
    plt.tight_layout()
    plt.show()

## 6. Display Metadata Information

In [None]:
# Display metadata for a few examples
for i, (path, data) in enumerate(list(light_curves.items())[:3]):
    print(f"\nFile {i+1}: {os.path.basename(path)}")
    print(f"Flux column: {data['flux_column_used']}")
    print("Metadata:")
    for key, value in data['metadata'].items():
        print(f"  {key}: {value}")

## 7. Conclusion

This notebook demonstrates how to use the shared utilities to handle FITS files with different flux column naming conventions. The key features include:

1. Automatically detecting and using either `PDCSAP_FLUX`, `SAP_FLUX`, or `FLUX` columns
2. Normalizing flux values for consistent analysis
3. Extracting metadata from FITS headers
4. Loading all FITS files in a directory with flexible column handling
5. Comparing light curves from different data sources

These utilities ensure our analysis can work with data from different telescopes and processing pipelines, regardless of the specific column naming used.