In [None]:
"""
Flight Delay Analysis Visualization

This script reads aggregated flight delay data from Spark output files
and generates a line plot showing monthly delay trends from 1987 to 1996.

Requirements:
- pandas
- matplotlib
- q3/ directory containing part-* files with aggregated delay statistics
"""

import glob
import os
import pandas as pd
import matplotlib.pyplot as plt


def load_data(directory: str = "q3") -> pd.DataFrame | None:
    """
    Load aggregated data from Spark output files.
    
    Args:
        directory: Path to directory containing part-* files
        
    Returns:
        DataFrame with combined data or None if no files found
    """
    all_records = glob.iglob(os.path.join(directory, "part*"), recursive=True)
    dataframes = [pd.read_csv(f, sep="\t", header=None) for f in all_records]
    
    if not dataframes:
        return None
    
    return pd.concat(dataframes, ignore_index=True)


def process_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Process and clean the delay data.
    
    Args:
        df: Raw dataframe from load_data()
        
    Returns:
        Processed dataframe with proper types and sorting
    """
    df.columns = [
        'Month',
        'Top Arrival Delay',
        'Top Departure Delay',
        'Top (Arrival Delay + Departure Delay)'
    ]
    
    df = df[df['Month'] != 'Month']
    
    for col in df.columns:
        if col != 'Month':
            df[col] = df[col].astype(float) / 60
    
    df['Month'] = df['Month'].astype(int)
    df = df.sort_values('Month')
    
    return df


def visualize_delays(df: pd.DataFrame, save_path: str = None) -> None:
    """
    Create and display/save the delay visualization.
    
    Args:
        df: Processed dataframe
        save_path: Optional path to save figure as PNG
    """
    fig, ax = plt.subplots(figsize=(15, 5))
    
    df.plot(
        ax=ax,
        x='Month',
        y=['Top Arrival Delay', 'Top Departure Delay', 'Top (Arrival Delay + Departure Delay)'],
        kind='line',
        style='-o',
        marker='o',
        linewidth=2
    )
    
    ax.set_title('Flight Delay Analysis (1987-1996)', fontsize=14, fontweight='bold')
    ax.set_xlabel('Month', fontsize=12)
    ax.set_ylabel('Delay (hours)', fontsize=12)
    ax.set_xticks(df['Month'])
    ax.grid(True, alpha=0.3)
    ax.legend(title='Delay Type', fontsize=10)
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Figure saved to {save_path}")
    
    plt.show()


def main():
    """Main execution function."""
    df = load_data()
    
    if df is None:
        print("Error: No data files found in q3/ directory")
        return
    
    df = process_data(df)
    visualize_delays(df, save_path='flight_delays.png')


if __name__ == '__main__':
    main()