In [1]:
"""
Flight Route Weather Prediction System
Uses barycentric interpolation to predict weather along flight routes

Input: Source station ID + Destination station ID
Output: Weather predictions at waypoints along the route
"""

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

class FlightWeatherPredictor:
    """
    Predicts weather conditions along flight routes using barycentric interpolation
    """

    def __init__(self, data_path='training_data_cleaned.csv'):
        """
        Initialize predictor with training data

        Args:
            data_path: Path to your CSV file with weather data
        """
        print("Loading training data...")

        # --- FIX 1: Use the correct semicolon delimiter ---
        try:
            self.data = pd.read_csv(data_path, sep=';')
        except pd.errors.ParserError as e:
            print(f"FATAL ERROR: Failed to load data. Check delimiter and file format. Error: {e}")
            raise

        # --- FIX 2: Data Cleaning for Coordinates and Station ID ---
        print("Cleaning and standardizing data columns...")

        # Standardize station ID to uppercase string (important for lookups)
        if 'station' in self.data.columns:
            self.data['station'] = self.data['station'].astype(str).str.upper()

        # Convert latitude/longitude to float and de-scale (assuming they were multiplied by 10^6)
        if 'latitude' in self.data.columns and 'longitude' in self.data.columns:
            # Convert to numeric, coercing errors to NaN
            self.data['latitude'] = pd.to_numeric(self.data['latitude'], errors='coerce')
            self.data['longitude'] = pd.to_numeric(self.data['longitude'], errors='coerce')

            # De-scale the coordinates
            if (self.data['latitude'].abs() > 1000).any():
                self.data['latitude'] /= 1000000.0
            if (self.data['longitude'].abs() > 1000).any():
                self.data['longitude'] /= 1000000.0

        # Drop any rows where coordinates are still missing or invalid after cleaning
        self.data.dropna(subset=['latitude', 'longitude'], inplace=True)

        # Display dataset structure
        print(f"\nDataset loaded: {len(self.data)} records")
        print(f"Columns: {list(self.data.columns)}")
        print(f"\nFirst few rows:")
        print(self.data.head())

        # Extract unique stations
        self.stations = self._extract_station_info()
        print(f"\n✓ Found {len(self.stations)} unique weather stations")

    def _extract_station_info(self):
        """
        Extract station information from the dataset
        Assumes columns like: station_id, latitude, longitude, altitude
        Adjust column names based on your actual CSV structure
        """
        # Try common column name variations
        possible_id_cols = ['station_id', 'ICAO', 'icao', 'station', 'aerodrome']
        possible_lat_cols = ['latitude', 'lat', 'Latitude']
        possible_lon_cols = ['longitude', 'lon', 'long', 'Longitude']

        # --- FIX 3: Add actual altitude column name for better detection ---
        possible_alt_cols = ['altitude', 'elevation', 'alt', 'Altitude', 'altitude_surface_ft']

        # Find actual column names
        id_col = next((col for col in possible_id_cols if col in self.data.columns), None)
        lat_col = next((col for col in possible_lat_cols if col in self.data.columns), None)
        lon_col = next((col for col in possible_lon_cols if col in self.data.columns), None)
        alt_col = next((col for col in possible_alt_cols if col in self.data.columns), None)

        if not all([id_col, lat_col, lon_col]):
            print("\n⚠ Warning: Could not auto-detect primary column names.")
            print("Available columns:", list(self.data.columns))
            # Fallback to interactive input is left out to keep the code runnable
            raise ValueError("Required columns (station ID, lat, lon) not found or specified.")

        # Extract unique stations with their coordinates
        if alt_col and alt_col in self.data.columns:
            stations = self.data[[id_col, lat_col, lon_col, alt_col]].drop_duplicates(subset=[id_col])
            stations.columns = ['station_id', 'latitude', 'longitude', 'altitude']
        else:
            stations = self.data[[id_col, lat_col, lon_col]].drop_duplicates(subset=[id_col])
            stations.columns = ['station_id', 'latitude', 'longitude']
            stations['altitude'] = 0  # Default altitude if not available

        # Ensure coordinates are numeric before setting index
        stations['latitude'] = pd.to_numeric(stations['latitude'], errors='coerce')
        stations['longitude'] = pd.to_numeric(stations['longitude'], errors='coerce')

        return stations.set_index('station_id')

    def list_available_stations(self):
        """Display all available stations"""
        print("\n" + "="*70)
        print("AVAILABLE WEATHER STATIONS")
        print("="*70)
        print(f"{'Station ID':<15} {'Latitude':<12} {'Longitude':<12} {'Altitude (ft)':<12}")
        print("-"*70)
        # Limit to the first 10 for a cleaner display unless user specifically requests all
        count = 0
        for station_id, row in self.stations.head(10).iterrows():
            print(f"{station_id:<15} {row['latitude']:<12.4f} {row['longitude']:<12.4f} {row['altitude']:<12.0f}")
            count += 1

        if len(self.stations) > 10:
             print(f"... and {len(self.stations) - 10} more stations.")

        print("="*70)

    def haversine_distance(self, lat1, lon1, lat2, lon2):
        """Calculate great circle distance in kilometers"""
        R = 6371.0  # Earth radius in km
        lat1_rad, lat2_rad = np.radians(lat1), np.radians(lat2)
        delta_lat = np.radians(lat2 - lat1)
        delta_lon = np.radians(lon2 - lon1)

        a = (np.sin(delta_lat / 2) ** 2 +
             np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(delta_lon / 2) ** 2)
        c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
        return R * c

    def find_nearest_stations(self, lat, lon, n=3):
        """
        Find n nearest weather stations to given coordinates

        Returns:
            DataFrame with nearest stations and their distances
        """
        distances = []
        for station_id, row in self.stations.iterrows():
            dist = self.haversine_distance(lat, lon, row['latitude'], row['longitude'])
            distances.append({
                'station_id': station_id,
                'distance_km': dist,
                'latitude': row['latitude'],
                'longitude': row['longitude'],
                'altitude': row['altitude']
            })

        df = pd.DataFrame(distances).sort_values('distance_km')
        return df.head(n)

    def get_station_weather(self, station_id, timestamp=None):
        """
        Get most recent weather observation for a station

        Args:
            station_id: Station identifier
            timestamp: Optional specific timestamp (uses most recent if None)

        Returns:
            Dictionary with weather parameters
        """
        # Find column name for station ID
        id_col = [col for col in self.data.columns if 'station' in col.lower() or 'icao' in col.lower()][0]

        # Filter data for this station
        station_data = self.data[self.data[id_col] == station_id]

        if len(station_data) == 0:
            return None

        # Get most recent observation (or use timestamp if provided)
        if timestamp is None:
            latest = station_data.iloc[-1]
        else:
            # Simplified: just use most recent for now
            latest = station_data.iloc[-1]

        # Extract weather parameters (adjust column names based on your data)
        weather = {}

        # Common weather parameter column names (updated to match CSV data)
        param_mappings = {
            'temperature': ['temperature_c', 'temperature', 'temp', 'Temperature', 'temp_c'],
            'pressure': ['pressure_hpa', 'pressure', 'press', 'Pressure'],
            'wind_speed': ['wind_speed_kt', 'wind_speed', 'windspeed', 'Wind_Speed', 'wspd'],
            'wind_direction': ['wind_direction_deg', 'wind_direction', 'wind_dir', 'Wind_Direction', 'wdir'],
            'visibility': ['visibility_sm', 'visibility', 'vis', 'Visibility'],
            'cloud_ceiling': ['cloud_ceiling_ft', 'cloud_ceiling', 'ceiling', 'Cloud_Ceiling', 'clouds']
        }

        for param, possible_cols in param_mappings.items():
            col = next((c for c in possible_cols if c in self.data.columns), None)
            if col:
                weather[param] = latest[col]
            else:
                weather[param] = None

        return weather

    def interpolate_circular(self, angles, weights):
        """
        Interpolate circular data (wind direction)
        Handles the fact that 359° and 1° should average to 0°, not 180°
        """
        angles_rad = np.radians(angles)
        x = np.sum(weights * np.cos(angles_rad))
        y = np.sum(weights * np.sin(angles_rad))
        result = np.degrees(np.arctan2(y, x))
        return result % 360

    def interpolate_weather(self, lat, lon, altitude=None):
        """
        Interpolate weather at any point using barycentric interpolation

        Args:
            lat: Latitude of query point
            lon: Longitude of query point
            altitude: Altitude in feet (optional)

        Returns:
            Dictionary with interpolated weather + metadata
        """
        # Find 3 nearest stations
        nearest = self.find_nearest_stations(lat, lon, n=3)

        # Calculate inverse distance weights
        epsilon = 0.001  # Avoid division by zero
        inv_distances = 1.0 / (nearest['distance_km'].values + epsilon) ** 2
        weights = inv_distances / np.sum(inv_distances)

        # Get weather data from each station
        weather_data = []
        for station_id in nearest['station_id']:
            weather = self.get_station_weather(station_id)
            if weather:
                weather_data.append(weather)

        if len(weather_data) < 3:
            print(f"Warning: Only {len(weather_data)} stations available for interpolation.")
            return None

        # Interpolate each parameter
        result = {
            'latitude': lat,
            'longitude': lon,
            'altitude': altitude if altitude else 0
        }

        # Linear parameters
        for param in ['temperature', 'pressure', 'wind_speed', 'visibility', 'cloud_ceiling']:
            values = [w[param] for w in weather_data if w[param] is not None and pd.notna(w[param])]
            if len(values) == 3:
                result[param] = np.sum(weights * np.array(values))
            else:
                result[param] = None

        # Circular parameter (wind direction)
        wind_dirs = [w['wind_direction'] for w in weather_data if w['wind_direction'] is not None and pd.notna(w['wind_direction'])]
        if len(wind_dirs) == 3:
            result['wind_direction'] = self.interpolate_circular(np.array(wind_dirs), weights)
        else:
            result['wind_direction'] = None

        # Altitude correction (if target altitude is different)
        if altitude and altitude > 0:
            avg_station_alt = nearest['altitude'].mean()
            altitude_diff_ft = altitude - avg_station_alt

            # Temperature lapse rate: -1.98°C per 1000 ft
            if result['temperature'] is not None:
                result['temperature'] += -1.98 * (altitude_diff_ft / 1000.0)

            # Pressure: approximately -1 hPa per 30 ft near sea level (simplistic)
            if result['pressure'] is not None:
                result['pressure'] += -1.0 * (altitude_diff_ft / 30.0)

        # Add metadata
        result['nearest_stations'] = nearest['station_id'].tolist()
        result['station_distances'] = nearest['distance_km'].tolist()
        result['interpolation_weights'] = weights.tolist()

        return result

    def generate_route_waypoints(self, source_id, dest_id, num_waypoints=10):
        """
        Generate waypoints along route from source to destination

        Args:
            source_id: Source station ID
            dest_id: Destination station ID
            num_waypoints: Number of intermediate points to generate

        Returns:
            List of (lat, lon, altitude) waypoints
        """
        if source_id not in self.stations.index:
            raise ValueError(f"Source station '{source_id}' not found")
        if dest_id not in self.stations.index:
            raise ValueError(f"Destination station '{dest_id}' not found")

        source = self.stations.loc[source_id]
        dest = self.stations.loc[dest_id]

        # Calculate route distance
        distance_km = self.haversine_distance(
            source['latitude'], source['longitude'],
            dest['latitude'], dest['longitude']
        )

        # Generate waypoints (linear interpolation for simplicity)
        waypoints = []
        for i in range(num_waypoints + 2):  # Include source and destination
            fraction = i / (num_waypoints + 1)

            lat = source['latitude'] + fraction * (dest['latitude'] - source['latitude'])
            lon = source['longitude'] + fraction * (dest['longitude'] - source['longitude'])

            # Simulate cruise altitude profile (climb, cruise, descend)
            CRUISE_ALT = 25000 # Example cruise altitude in ft
            CLIMB_FRAC = 0.2
            DESCENT_FRAC = 0.8

            if fraction < CLIMB_FRAC:  # Climb phase
                altitude = source['altitude'] + (fraction / CLIMB_FRAC) * (CRUISE_ALT - source['altitude'])
            elif fraction < DESCENT_FRAC:  # Cruise phase
                altitude = CRUISE_ALT
            else:  # Descent phase
                altitude = CRUISE_ALT - ((fraction - DESCENT_FRAC) / (1 - DESCENT_FRAC)) * (CRUISE_ALT - dest['altitude'])

            waypoints.append({
                'waypoint_number': i + 1,
                'latitude': lat,
                'longitude': lon,
                'altitude': altitude,
                'distance_from_source': distance_km * fraction
            })

        return waypoints, distance_km

    def predict_route_weather(self, source_id, dest_id, num_waypoints=10):
        """
        Predict weather along entire flight route

        Args:
            source_id: Source station ID (e.g., 'CYVR')
            dest_id: Destination station ID (e.g., 'CYYC')
            num_waypoints: Number of intermediate waypoints

        Returns:
            DataFrame with weather predictions at each waypoint
        """
        print(f"\n{'='*80}")
        print(f"FLIGHT ROUTE WEATHER PREDICTION")
        print(f"{'='*80}")
        print(f"Route: {source_id} → {dest_id}")

        # Generate waypoints
        waypoints, total_distance = self.generate_route_waypoints(source_id, dest_id, num_waypoints)
        print(f"Total Distance: {total_distance:.1f} km")
        print(f"Number of Waypoints: {len(waypoints)}")
        print(f"\n{'='*80}")

        # Predict weather at each waypoint
        predictions = []
        for wp in waypoints:
            weather = self.interpolate_weather(
                wp['latitude'],
                wp['longitude'],
                wp['altitude']
            )

            if weather:
                predictions.append({
                    **wp,
                    **{k: v for k, v in weather.items()
                       if k not in ['latitude', 'longitude', 'altitude']}
                })

        return pd.DataFrame(predictions)


def format_weather_display(predictions_df):
    """
    Format predictions for nice display
    """
    if predictions_df.empty:
        print("\nNo weather predictions available.")
        return

    print("\n" + "="*150)
    print("WEATHER FORECAST ALONG ROUTE")
    print("="*150)

    # Display summary columns
    display_cols = ['waypoint_number', 'distance_from_source', 'altitude',
                    'temperature', 'pressure', 'wind_direction', 'wind_speed',
                    'visibility', 'cloud_ceiling']

    # Filter to only columns that exist
    display_cols = [col for col in display_cols if col in predictions_df.columns]

    df_display = predictions_df[display_cols].copy()

    # Format columns
    if 'distance_from_source' in df_display.columns:
        df_display['distance_km'] = df_display['distance_from_source'].apply(lambda x: f"{x:.0f}km" if pd.notna(x) else "N/A")
    if 'altitude' in df_display.columns:
        df_display['altitude_ft'] = df_display['altitude'].apply(lambda x: f"{x:.0f}ft" if pd.notna(x) else "N/A")
    if 'temperature' in df_display.columns:
        df_display['temp_c'] = df_display['temperature'].apply(lambda x: f"{x:.1f}°C" if pd.notna(x) else "N/A")
    if 'pressure' in df_display.columns:
        df_display['pressure_hpa'] = df_display['pressure'].apply(lambda x: f"{x:.1f} hPa" if pd.notna(x) else "N/A")
    if 'wind_direction' in df_display.columns and 'wind_speed' in df_display.columns:
        df_display['wind'] = df_display.apply(
            lambda row: f"{row['wind_direction']:.0f}°@{row['wind_speed']:.0f}kt"
            if pd.notna(row['wind_direction']) and pd.notna(row['wind_speed']) else "N/A",
            axis=1
        )
    if 'visibility' in df_display.columns:
        df_display['vis_sm'] = df_display['visibility'].apply(lambda x: f"{x:.1f} SM" if pd.notna(x) else "N/A")
    if 'cloud_ceiling' in df_display.columns:
        # Assuming cloud_ceiling is already in hundreds of feet if it needs to be formatted like this
        df_display['ceiling'] = df_display['cloud_ceiling'].apply(lambda x: f"{x:.0f} ft" if pd.notna(x) else "N/A")

    # Select final display columns
    final_cols = ['waypoint_number', 'distance_km', 'altitude_ft', 'temp_c', 'pressure_hpa', 'wind', 'vis_sm', 'ceiling']
    final_cols = [col for col in final_cols if col in df_display.columns]

    print(df_display[final_cols].to_string(index=False))
    print("="*150)

    # Show interpolation details for a few waypoints
    print("\nINTERPOLATION DETAILS (Sample Waypoints):")
    print("-"*150)
    # Use max(1, len(predictions_df)//2) to avoid IndexError on small DFs
    sample_indices = [0, max(1, len(predictions_df)//2), len(predictions_df)-1]

    for idx in sample_indices:
        if idx < len(predictions_df):
            row = predictions_df.iloc[idx]
            print(f"\nWaypoint {row['waypoint_number']}: ({row['latitude']:.3f}, {row['longitude']:.3f}) @ {row['altitude']:.0f}ft")
            if 'nearest_stations' in row:
                print(f"  Nearest stations: {row['nearest_stations']}")
                if 'station_distances' in row:
                    print(f"  Distances: {[f'{d:.1f}km' for d in row['station_distances']]}")
                if 'interpolation_weights' in row:
                    print(f"  Weights: {[f'{w:.3f}' for w in row['interpolation_weights']]}")
    print("="*150)


def main():
    """
    Main function - Interactive weather prediction system
    """
    print("     FLIGHT ROUTE WEATHER PREDICTION SYSTEM")
    print("     Using Barycentric Interpolation")

    # Initialize predictor
    try:
        predictor = FlightWeatherPredictor('training_data_cleaned.csv')
    except Exception as e:
        print(f"\n FAILED TO INITIALIZE PREDICTOR: {e}")
        return

    while True:
        print("\n" + "="*80)
        print("MAIN MENU")
        print("="*80)
        print("1. List available weather stations (Top 10)")
        print("2. Predict weather along flight route")
        print("3. Predict weather at custom location")
        print("4. Exit")
        print("="*80)

        choice = input("\nEnter your choice (1-4): ").strip()

        if choice == '1':
            predictor.list_available_stations()

        elif choice == '2':
            print("\n--- ROUTE WEATHER PREDICTION ---")
            predictor.list_available_stations()

            source_id = input("\nEnter source station ID (e.g., CYVR): ").strip().upper()
            dest_id = input("Enter destination station ID (e.g., CYYC): ").strip().upper()

            try:
                num_waypoints = int(input("Number of intermediate waypoints (default 10): ") or "10")
            except:
                num_waypoints = 10

            try:
                predictions = predictor.predict_route_weather(source_id, dest_id, num_waypoints)
                format_weather_display(predictions)

                # Option to save
                save = input("\nSave predictions to CSV? (y/n): ").strip().lower()
                if save == 'y':
                    filename = f"weather_prediction_{source_id}_{dest_id}.csv"
                    predictions.to_csv(filename, index=False)
                    print(f"✓ Saved to {filename}")

            except Exception as e:
                print(f"\n✗ Error: {e}")

        elif choice == '3':
            print("\n--- CUSTOM LOCATION PREDICTION ---")
            try:
                lat = float(input("Enter latitude: "))
                lon = float(input("Enter longitude: "))
                alt = float(input("Enter altitude (ft, press Enter for surface): ") or "0")

                weather = predictor.interpolate_weather(lat, lon, alt)

                if weather:
                    print("\n" + "="*80)
                    print("PREDICTED WEATHER")
                    print("="*80)
                    print(f"Location: ({lat:.4f}, {lon:.4f}) @ {alt:.0f} ft")
                    print(f"\nTemperature: {weather.get('temperature', 'N/A'):.1f}°C")
                    print(f"Pressure: {weather.get('pressure', 'N/A'):.1f} hPa")
                    print(f"Wind: {weather.get('wind_direction', 'N/A'):.0f}° @ {weather.get('wind_speed', 'N/A'):.0f} kt")
                    print(f"Visibility: {weather.get('visibility', 'N/A'):.1f} SM")

                    ceiling_val = weather.get('cloud_ceiling', None)
                    if pd.notna(ceiling_val):
                        print(f"Cloud Ceiling: {ceiling_val:.0f} ft AGL")
                    else:
                        print("Cloud Ceiling: N/A")

                    print(f"\nNearest Stations: {weather.get('nearest_stations', [])}")
                    print(f"Distances: {[f'{d:.1f}km' for d in weather.get('station_distances', [])]}")
                    print("="*80)
                else:
                    print("✗ Could not predict weather at this location (Need 3 valid stations)")

            except Exception as e:
                print(f"\n✗ Error: {e}")

        elif choice == '4':
            print("\n Thank you for using the Flight Weather Prediction System!")
            print("  Safe flights! \n")
            break

        else:
            print("✗ Invalid choice. Please enter 1-4.")


if __name__ == "__main__":
    main()


🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 
     FLIGHT ROUTE WEATHER PREDICTION SYSTEM
     Using Barycentric Interpolation
🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 🛩️ 

Loading training data...
Cleaning and standardizing data columns...

Dataset loaded: 68159 records
Columns: ['timestamp', 'date', 'time', 'station', 'latitude', 'longitude', 'altitude_surface_ft', 'temperature_c', 'pressure_hpa', 'wind_direction_deg', 'wind_speed_kt', 'wind_gust_kt', 'visibility_sm', 'cloud_ceiling_ft']

First few rows:
          timestamp        date      time station   latitude  longitude  \
0  19/10/2025 22:00  19/10/2025  22:00:00    CYBB  68.535711 -89.805508   
1  19/10/2025 21:06  19/10/2025  21:06:00    CYBB  68.535711 -89.805508   
2  19/10/2025 21:00  19/10/2025  21:00:00    CYBB  68.535711 -89.805508   
3  19/10/2025 20:00  19/10/2025  20:00:00    CYBB  68.535711 -89.805508   
4  19/10/2025 19:43  19/10/2025  19:43:00    CYBB  68.535711 -89.805508   

   al