In [None]:
import numpy as np 
from matplotlib import pyplot as plt

from dotenv import load_dotenv
load_dotenv()

import os 

PROJECT_ROOT = os.getenv('PROJECT_ROOT')
print(f'PROJECT_ROOT: {PROJECT_ROOT}')

In [2]:
import pandas as pd
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import os
from pathlib import Path

def plot_flight_segments(csv_path, output_path):
    # Read the CSV file
    df = pd.read_csv(csv_path)
    
    # Create a new figure with cartopy projection
    plt.figure(figsize=(12, 8))
    ax = plt.axes(projection=ccrs.PlateCarree())
    
    # Add map features
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.OCEAN)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle=':')
    
    # Plot each flight segment
    for _, row in df.iterrows():
        ax.plot([row['from_lon'], row['to_lon']], 
                [row['from_lat'], row['to_lat']], 
                color='red', 
                linewidth=1, 
                transform=ccrs.PlateCarree(),
                alpha=0.1)
    
    # Set the map extent based on the data
    padding = 2  # degrees
    # Set fixed extent for Europe
    min_lon = -10  # Western edge (roughly Portugal)
    max_lon = 40   # Eastern edge (roughly Ukraine/Russia)
    min_lat = 35   # Southern edge (roughly Mediterranean)
    max_lat = 70   # Northern edge (roughly Norway/Sweden)
    
    ax.set_extent([min_lon, max_lon, min_lat, max_lat], crs=ccrs.PlateCarree())
    
    # Add gridlines
    ax.gridlines(draw_labels=True)
    
    # Save the plot
    plt.savefig(output_path, bbox_inches='tight', dpi=300)
    plt.close()

# def main():
#     # Define paths
#     project_root = Path("PROJECT_ROOT")
#     input_dir = project_root / "data" / "hourly"
#     output_dir = project_root / "data" / "plot"
    
#     # Create output directory if it doesn't exist
#     output_dir.mkdir(parents=True, exist_ok=True)
    
#     # Process each CSV file in the input directory
#     for csv_file in input_dir.glob("*.csv"):
#         output_file = output_dir / f"{csv_file.stem}.png"
#         print(f"Processing {csv_file.name}...")
#         plot_flight_segments(csv_file, output_file)
#         print(f"Saved plot to {output_file}")

# if __name__ == "__main__":
#     main()

In [3]:
from mpire import WorkerPool
import os
import glob

# Ensure that plot_flight_segments is defined or imported
# For example:
# from your_module import plot_flight_segments

def process_single_csv(csv_file, output_dir):
    """
    Processes a single CSV file to generate a PNG plot.

    Args:
        csv_file (str): Path to the input CSV file.
        output_dir (str): Directory where the output PNG will be saved.
    """
    try:
        # Skip files that start with '._' (e.g., macOS hidden files)
        if os.path.basename(csv_file).startswith('._'):
            print(f"Skipping hidden file: {csv_file}")
            return

        # Construct the output file path
        base_name = os.path.splitext(os.path.basename(csv_file))[0]
        output_file = os.path.join(output_dir, f"{base_name}.png")

        print(f"Processing {csv_file}...")
        plot_flight_segments(csv_file, output_file)
        print(f"Saved plot to {output_file}")

    except Exception as e:
        print(f"Error processing {csv_file}: {e}")

def process_csv_files():
    """
    Processes all CSV files in the input directory using parallel processing.
    """
    # Define paths
    project_root = os.getenv('PROJECT_ROOT')
    if not project_root:
        raise EnvironmentError("Environment variable 'PROJECT_ROOT' is not set.")

    input_dir = os.path.join(project_root, 'data', 'hourly')
    output_dir = os.path.join(project_root, 'data', 'plot')

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Get list of CSV files
    csv_files = glob.glob(os.path.join(input_dir, "*.csv"))

    if not csv_files:
        print("No CSV files found to process.")
        return

    print(f'Processing {len(csv_files)} files...')

    # Use mpire to parallelize processing using all available CPU cores
    with WorkerPool(n_jobs=None) as pool:  # -1 uses all available cores
        # The second argument (output_dir) is passed as a fixed argument to all workers
        pool.map(
            process_single_csv,
            [(csv_file, output_dir) for csv_file in csv_files],  # Combine arguments into tuples
            progress_bar=True
        )


In [None]:
process_csv_files()