In [1]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
from haversine import haversine, Unit
from datetime import datetime, timedelta
import os
import webbrowser
import warnings

# --- 1. SETUP: Try to import Folium for maps ---
try:
    import folium
    FOLIUM_INSTALLED = True
except ImportError:
    print("--- WARNING: 'folium' not installed. Map visualization will be disabled. ---")
    print("To install it, run: pip install folium")
    FOLIUM_INSTALLED = False

# Suppress warnings from interpolation, which is fine
warnings.filterwarnings('ignore', category=UserWarning)

# --- 2. THE "BRAIN": IDW Model Functions ---

# We predict a single 'pressure' column, as requested
VARIABLES_TO_PREDICT = [
    'temperature', 
    'pressure',           # <-- ONE pressure column
    'wind_direction', 
    'wind_speed', 
    'visibility', 
    'ceiling_ft_asl'      # This will be nan at high alt, which is correct
]

def find_k_nearest_stations(query_lat, query_lon, df_aero, k=5):
    """Finds the k-nearest stations to a query point."""
    query_point = (query_lat, query_lon)
    station_points = list(zip(df_aero['latitude'], df_aero['longitude']))
    # Calculate distance from query point to all stations
    df_aero['distance_km'] = [haversine(query_point, pt, unit=Unit.KILOMETERS) for pt in station_points]
    return df_aero.sort_values(by='distance_km').head(k)

def get_virtual_weather_for_station(station_name, query_timestamp, query_alt, df_master_station):
    """
    Performs a 2-step interpolation (altitude, then time) for one station.
    This is the core logic of the model.
    """
    final_predictions = {}
    for var in VARIABLES_TO_PREDICT:
        # 1. Get all valid data for this variable for this station
        df_var = df_master_station.dropna(subset=[var, 'timestamp', 'altitude_ft_asl'])
        
        # 2. Need at least 4 points to do a 2D interpolation (2 points at 2 times)
        if len(df_var) < 4: 
            final_predictions[var] = np.nan
            continue
        
        # 3. Step A: Altitude Interpolation
        # For each time we have data, create an altitude interpolator
        time_groups = df_var.groupby('timestamp')
        alt_interp_data = []
        for time, group in time_groups:
            # Need at least 2 altitudes at this time to draw a line
            group = group.sort_values(by='altitude_ft_asl').drop_duplicates(subset=['altitude_ft_asl'])
            if len(group) < 2: continue
            
            # Create the 1D altitude interpolation function
            alt_interp_func = interp1d(
                group['altitude_ft_asl'], group[var], kind='linear', fill_value="extrapolate"
            )
            # Find the "virtual" weather at our query altitude
            alt_interp_data.append({'timestamp': time, 'value': alt_interp_func(query_alt)})
            
        # 4. Step B: Time Interpolation
        # Now we interpolate our "virtual" altitude points over time
        if len(alt_interp_data) < 2:
            final_predictions[var] = np.nan
            continue
            
        df_time_interp = pd.DataFrame(alt_interp_data).sort_values(by='timestamp').drop_duplicates(subset=['timestamp'])
        
        # Create the 1D time interpolation function
        time_interp_func = interp1d(
            df_time_interp['timestamp'], df_time_interp['value'], kind='linear', fill_value="extrapolate"
        )
        # Find the final "virtual" weather at our query time
        final_predictions[var] = time_interp_func(query_timestamp)
        
    return final_predictions

def predict_idw(query_date, query_time, query_lat, query_lon, query_alt, df_master, df_aero, k=5):
    """Main prediction function that ties everything together."""
    try:
        # Convert human-readable time to a number for interpolation
        query_dt = pd.to_datetime(f"{query_date} {query_time}")
        query_timestamp = query_dt.value // 10**9
    except Exception as e:
        print(f"  > Error: Invalid date or time format. Use 'YYYY-MM-DD' and 'HH:MM:SS'.")
        return None, None
        
    # Validate geographic coordinates
    if not (-90 <= query_lat <= 90):
        print(f"  > Error: Invalid Latitude '{query_lat}'. Must be between -90 and 90.")
        return None, None
    if not (-180 <= query_lon <= 180):
        print(f"  > Error: Invalid Longitude '{query_lon}'. Must be between -180 and 180.")
        return None, None
        
    df_aero_copy = df_aero.copy()
    try:
        # 1. Find 5 nearest stations
        nearest_stations = find_k_nearest_stations(query_lat, query_lon, df_aero_copy, k)
    except ValueError as e:
        print(f"  > Error in coordinate calculation: {e}")
        return None, None
        
    station_predictions = []
    # 2. Get a "virtual weather" report from each station
    for _, station in nearest_stations.iterrows():
        station_name = station['station']
        distance = station['distance_km']
        
        # Get all data for this one station
        df_master_station = df_master[df_master['station'] == station_name]
        if len(df_master_station) == 0: continue
        
        # Run the 2-step interpolation
        virtual_weather = get_virtual_weather_for_station(
            station_name, query_timestamp, query_alt, df_master_station
        )
        
        # Calculate weight (handle division by zero if we are at the station)
        if distance < 0.1: distance = 0.1
        weight = 1 / (distance ** 2)
        station_predictions.append({'weather': virtual_weather, 'weight': weight})
        
    if not station_predictions:
        return None, nearest_stations
        
    # 3. Calculate the final weighted average
    final_weighted_preds = {}
    for var in VARIABLES_TO_PREDICT:
        numerator = 0
        denominator = 0
        for p in station_predictions:
            val = p['weather'][var]
            if not np.isnan(val): # Only use valid numbers
                numerator += p['weight'] * val
                denominator += p['weight']
                
        if denominator == 0:
            final_weighted_preds[var] = np.nan
        else:
            final_weighted_preds[var] = numerator / denominator
            
    return final_weighted_preds, nearest_stations

# --- 3. THE "VISUALIZER": Map Function ---

def format_prediction_html(pred_dict):
    """Helper function to create clean HTML for map popups."""
    if not pred_dict:
        return "No prediction data available."
        
    html = ""
    # Use .get() to safely access keys, providing 'nan' as a default
    temp = pred_dict.get('temperature', np.nan)
    pres = pred_dict.get('pressure', np.nan)
    wdir = pred_dict.get('wind_direction', np.nan)
    wspd = pred_dict.get('wind_speed', np.nan)
    vis = pred_dict.get('visibility', np.nan)
    ceil = pred_dict.get('ceiling_ft_asl', np.nan)

    # We now show 'nan' if the value is missing, which is clearer
    html += f"Temperature: {temp:.2f} C<br>"
    html += f"Pressure: {pres:.2f} hPa<br>"
    html += f"Wind Direction: {wdir:.2f} deg<br>"
    html += f"Wind Speed: {wspd:.2f} kts<br>"
    html += f"Visibility: {vis:.2f} sm<br>"
    
    # Only show ceiling if it's a valid number (i.e., at the surface)
    if not np.isnan(ceil):
         html += f"Cloud Ceiling: {ceil:.0f} ft ASL<br>"
         
    return html

def create_and_open_map(points_for_map, stations_for_map):
    """Creates an interactive folium map and opens it in the browser."""
    if not FOLIUM_INSTALLED:
        print("\nFolium not installed. Skipping map generation.")
        return
    print("\nGenerating visual map...")
    
    map_center = [points_for_map[0]['lat'], points_for_map[0]['lon']]
    m = folium.Map(location=map_center, zoom_start=4) # Zoom out a bit for routes

    # --- THIS IS THE FIX ---
    # Add gray markers for all nearby stations (simple popup)
    for station in stations_for_map:
        popup_html = f"<b>Station: {station['name']}</b>"
        popup_html += f"<br>({station['lat']:.2f}, {station['lon']:.2f}) @ {station['alt']:.0f} ft"
        
        
        folium.Marker(
            location=[station['lat'], station['lon']],
            popup=folium.Popup(popup_html, max_width=300),
            icon=folium.Icon(color='gray', icon='plane', prefix='fa')
        ).add_to(m)

    # B. Add blue markers for the main query point(s)
    route_coords = []
    for point in points_for_map:
        route_coords.append([point['lat'], point['lon']])
        popup_html = f"<b>{point['name']}</b><br>({point['lat']:.2f}, {point['lon']:.2f}) @ {point['alt']:.0f} ft<hr>"
        popup_html += format_prediction_html(point['prediction'])
            
        folium.Marker(
            location=[point['lat'], point['lon']],
            popup=folium.Popup(popup_html, max_width=300),
            icon=folium.Icon(color='blue', icon='info-sign')
        ).add_to(m)

    # C. If it's a route (more than 1 point), draw the line
    if len(route_coords) > 1:
        folium.PolyLine(locations=route_coords, color='blue', weight=3, opacity=0.8).add_to(m)

    # D. Save and Open the map
    filepath = os.path.abspath('weather_map.html')
    m.save(filepath)
    print(f"Visual map saved to {filepath}")
    
    # Automatically open in browser
    webbrowser.open('file://' + filepath)
    print("Opening map in your web browser...")

# --- 4. THE "FACE": Helper Functions & Main Application ---

def get_station_info(station_code, station_db):
    """Gets Lat, Lon, and Alt for a station code."""
    try:
        station_info = station_db.loc[station_code.upper()]
        return station_info['latitude'], station_info['longitude'], station_info['elevation_ft']
    except KeyError:
        return None, None, None

def interpolate_point(start, end, fraction):
    """Linearly interpolates between two values."""
    return start + (end - start) * fraction

def main():
    print("Loading and preparing weather database... (This may take a moment)")
    
    try:
        df_aero = pd.read_csv('aerodromes.csv')
        df_master = pd.read_csv('master_dataset_final.csv')
    except FileNotFoundError as e:
        print(f"--- ERROR: File not found: {e.filename} ---")
        print("Please make sure 'aerodromes.csv' and 'master_dataset_final.csv' are in the same directory.")
        print("You must run the 'STEP_1_Create_Master_Dataset.py' script first.")
        return

    # --- Preprocessing ---
    # This happens once at the start for speed
    print("Preprocessing data (converting time)...")
    df_master['datetime'] = pd.to_datetime(df_master['date'] + ' ' + df_master['time'])
    df_master['timestamp'] = df_master['datetime'].astype(np.int64) // 10**9
    df_master = df_master.sort_values(by=['station', 'timestamp', 'altitude_ft_asl'])
    station_db = df_aero.set_index('station')
    
    print(" Weather Database is Ready.")
    print("\n--- PILOT'S FLIGHT PREDICTOR (FINAL) ---")
    
    while True:
        print("\nSelect Operation:")
        print("  [A] Flight Route Briefing")
        print("  [B] Unknown Point ")
        print("  [Q] Quit")
        choice = input("Your choice: ").strip().upper()

        if choice == 'Q':
            print("Safe flying. Goodbye.")
            break

        # --- [A] FLIGHT ROUTE BRIEFING ---
        elif choice == 'A':
            try:
                print("\n--- Flight Route Briefing ---")
                start_code = input("Enter Start Station (e.g., CYVR): ").upper()
                end_code = input("Enter End Station (e.g., CYYZ): ").upper()
                
                lat1, lon1, alt1 = get_station_info(start_code, station_db)
                lat2, lon2, alt2 = get_station_info(end_code, station_db)
                
                if lat1 is None or lat2 is None:
                    print("  > Error: One or both stations not found.")
                    continue
                
                q_dep_date_str = input("Enter Departure Date (YYYY-MM-DD): ")
                q_dep_time_str = input("Enter Departure Time (HH:MM:SS): ")
                
                q_cruise_alt = float(input("Enter Cruise Altitude (feet, range of(0 - 53000)): "))
                q_cruise_spd = float(input("Enter Cruise Speed (knots, range of (10 - 450)): "))
                
                dep_date = datetime.strptime(q_dep_date_str, '%Y-%m-%d').date()
                dep_time = datetime.strptime(q_dep_time_str, '%H:%M:%S').time()
                dep_dt = datetime.combine(dep_date, dep_time)
                
                total_dist_km = haversine((lat1, lon1), (lat2, lon2), unit=Unit.KILOMETERS)
                total_dist_nm = total_dist_km * 0.539957
                flight_time_hours = total_dist_nm / q_cruise_spd
                
                print(f"\n...Calculating Route ({total_dist_nm:.0f} nm, approx {flight_time_hours:.1f} hours)...")

                waypoints = [
                    ("Departure", 0.0, alt1),
                    ("Climb (25%)", 0.25, q_cruise_alt),
                    ("Cruise (50%)", 0.50, q_cruise_alt),
                    ("Descent (75%)", 0.75, q_cruise_alt),
                    ("Arrival", 1.0, alt2)
                ]
                
                print(f"\n--- FLIGHT BRIEFING FOR {start_code} -> {end_code} ---")
                
                points_for_map = []
                all_nearest_stations = {} # To store unique stations

                for (name, fraction, alt) in waypoints:
                    wp_lat = interpolate_point(lat1, lat2, fraction)
                    wp_lon = interpolate_point(lon1, lon2, fraction)
                    wp_time_dt = dep_dt + timedelta(hours=(flight_time_hours * fraction))
                    wp_date = wp_time_dt.strftime('%Y-%m-%d')
                    wp_time = wp_time_dt.strftime('%H:%M:%S')
                    
                    print(f"\n--- {name} @ {wp_time} ---")
                    print(f"  > Coords: ({wp_lat:.2f}, {wp_lon:.2f}) @ {alt:.0f} ft")
                    
                    pred, stations = predict_idw(wp_date, wp_time, wp_lat, wp_lon, alt, df_master, df_aero)
                    
                    points_for_map.append({
                        'lat': wp_lat, 'lon': wp_lon, 'alt': alt, 
                        'name': name, 'prediction': pred
                    })

                    if pred:
                        print(f"  > Temperature: {pred.get('temperature', np.nan):.2f} C")
                        print(f"  > Pressure: {pred.get('pressure', np.nan):.2f} hPa")
                        print(f"  > Wind Direction: {pred.get('wind_direction', np.nan):.2f} deg")
                        print(f"  > Wind Speed: {pred.get('wind_speed', np.nan):.2f} kts")
                        print(f"  > Visibility: {pred.get('visibility', np.nan):.2f} sm")
                        if not np.isnan(pred.get('ceiling_ft_asl', np.nan)):
                            print(f"  > Cloud Ceiling: {pred['ceiling_ft_asl']:.0f} ft ASL")
                    else:
                        print("  > No prediction data available for this point.")
                    
                    if stations is not None:
                        for _, s in stations.iterrows():
                            all_nearest_stations[s['station']] = 1 # Just store the name
                
                # --- THIS IS THE FIX ---
                # This is much faster. It just formats the station info we already have.
                stations_for_map = []
                for station_name in all_nearest_stations.keys():
                    s_lat, s_lon, s_alt = get_station_info(station_name, station_db)
                    if s_lat is None: continue
                    stations_for_map.append({
                        'lat': s_lat, 'lon': s_lon, 'alt': s_alt,
                        'name': station_name
                        # We no longer add a 'prediction' here
                    })
                
                create_and_open_map(points_for_map, stations_for_map)

            except ValueError:
                print("  > Error: Invalid number for altitude/speed or incorrect date/time format.")
            except Exception as e:
                print(f"  > An unexpected error occurred: {e}")

        # --- [B] UNKNOWN POINT ---
        elif choice == 'B':
            try:
                print("\n--- Unknown Point  ---")
                q_date = input("Enter Date (YYYY-MM-DD): ")
                q_time = input("Enter Time (HH:MM:SS, 24h): ")
                q_lat = float(input("Enter Latitude (e.g., 49.5): "))
                q_lon = float(input("Enter Longitude (e.g., -120.0): "))
                q_alt = float(input("Enter Altitude (in feet ASL): "))
                
                print("\n...Calculating...")
                
                pred, stations = predict_idw(q_date, q_time, q_lat, q_lon, q_alt, df_master, df_aero)
                
                if pred:
                    print(f"\n--- Predicted Weather at ({q_lat}, {q_lon}) @ {q_alt} ft ---")
                    print(f"  > Temperature: {pred.get('temperature', np.nan):.2f} C")
                    # --- THIS IS THE FIX ---
                    print(f"  > Pressure: {pred.get('pressure', np.nan):.2f} hPa")
                    print(f"  > Wind Direction: {pred.get('wind_direction', np.nan):.2f} deg")
                    print(f"  > Wind Speed: {pred.get('wind_speed', np.nan):.2f} kts")
                    print(f"  > Visibility: {pred.get('visibility', np.nan):.2f} sm")
                    # -----------------------
                    if not np.isnan(pred.get('ceiling_ft_asl', np.nan)):
                        print(f"  > Cloud Ceiling: {pred['ceiling_ft_asl']:.0f} ft ASL")

                points_for_map = [{
                    'lat': q_lat, 'lon': q_lon, 'alt': q_alt,
                    'name': 'Unknown Point', 'prediction': pred
                }]
                
                # --- THIS IS THE FIX ---
                # This is much faster. It just formats the station info.
                stations_for_map = []
                if stations is not None:
                    for _, s in stations.iterrows():
                        s_lat, s_lon, s_alt = get_station_info(s['station'], station_db)
                        if s_lat is None: continue
                        stations_for_map.append({
                            'lat': s_lat, 'lon': s_lon, 'alt': s_alt,
                            'name': s['station']
                            # We no longer add a 'prediction' here
                        })
                        
                create_and_open_map(points_for_map, stations_for_map)
            
            except ValueError:
                print("  > Error: Invalid input. Please enter numbers for lat/lon/alt.")

        else:
            print("Invalid choice. Please enter 'A', 'B', or 'Q'.")

# --- 5. RUN THE APPLICATION ---
if __name__ == "__main__":
    main()

Loading and preparing weather database... (This may take a moment)
Preprocessing data (converting time)...
 Weather Database is Ready.

--- PILOT'S FLIGHT PREDICTOR (FINAL) ---

Select Operation:
  [A] Flight Route Briefing
  [B] Unknown Point 
  [Q] Quit

--- Flight Route Briefing ---

...Calculating Route (1806 nm, approx 7.2 hours)...

--- FLIGHT BRIEFING FOR CYVR -> CYYZ ---

--- Departure @ 11:00:00 ---
  > Coords: (49.19, -123.18) @ 14 ft
  > Temperature: 11.45 C
  > Pressure: 1022.25 hPa
  > Wind Direction: 178.08 deg
  > Wind Speed: 0.23 kts
  > Visibility: 21.40 sm

--- Climb (25%) @ 12:48:23 ---
  > Coords: (47.81, -112.30) @ 40000 ft
  > Temperature: -51.42 C
  > Pressure: 189.04 hPa
  > Wind Direction: 317.44 deg
  > Wind Speed: 35.02 kts
  > Visibility: 56.83 sm

--- Cruise (50%) @ 14:36:46 ---
  > Coords: (46.44, -101.41) @ 40000 ft
  > Temperature: -49.34 C
  > Pressure: 187.40 hPa
  > Wind Direction: 246.51 deg
  > Wind Speed: 14.31 kts
  > Visibility: 58.10 sm

--- Des