In [2]:
import numpy as np
from global_land_mask import globe
from datetime import datetime, timedelta
import random
import time

# --- Configuration Parameters ---
# NOTE: A 0.01 degree step creates 100 points per degree (10,000 points per sq. degree).
# For demonstration, we use a very small, specific ocean bounding box (around the Bahamas)
# to keep the initial calculation time manageable.
MIN_LAT, MAX_LAT = 25.0, 25.5
MIN_LON, MAX_LON = -78.0, -77.5
GRID_STEP = 0.01

# Define a specific coastal point for feature calculation (Bahamas area)
COASTAL_LAT, COASTAL_LON = 25.2, -77.8
MAX_DIST_IN_BOX = 0.5 * np.sqrt(2) # Approx max distance in the 0.5x0.5 box

# --- Core Utility Function ---

def is_ocean_location(latitude, longitude):
  """
  Checks if the given latitude and longitude are over the ocean.
  Returns True if the point is over the ocean, False if it's over land.
  This is the essential function used to skip prediction on land points.
  """
  # globe.is_land() returns True for land, so we return the inverse.
  return not globe.is_land(latitude, longitude)

# --- Simulated ML Model Class ---

class SharkPredictionModel:
    """
    A placeholder class to simulate a trained Machine Learning model.
    In a real scenario, this would load weights from a file (e.g., scikit-learn or TensorFlow model).
    """
    def __init__(self):
        print("SharkPredictionModel initialized. Ready for prediction.")
    
    def predict(self, features):
        """
        Simulates the model's prediction output based on a feature vector.
        
        Args:
            features (np.array): A 2D array where each row is a feature vector:
                                 [distance_to_coast, time_of_day_sine_feature]
                                 
        Returns:
            np.array: A 1D array of prediction scores (probabilities 0.0 to 1.0).
        """
        # Unpack features for clarity
        distance_to_coast = features[:, 0]
        time_feature = features[:, 1]
        
        # Simulated prediction logic (must be vectorized for efficiency)
        # 1. Base probability from distance (Inverse relationship: close = high prob)
        base_prob = 1.0 - (distance_to_coast / MAX_DIST_IN_BOX)
        
        # 2. Influence of time (Assume higher risk in the evening/night, simulated by sine)
        time_influence = 0.1 * (np.sin(time_feature * 2 * np.pi) + 1.0) / 2.0
        
        # 3. Combined score with small random noise for simulation
        raw_prediction = base_prob + time_influence + np.random.uniform(-0.05, 0.05, size=features.shape[0])
        
        # Clamp output to valid probability range [0.0, 1.0]
        return np.clip(raw_prediction, 0.0, 1.0)

# --- Prediction Feature Generator ---

def generate_prediction_features(ocean_coords, timestamp):
    """
    Transforms the coordinates and timestamp into the feature matrix required by the model.
    
    Args:
        ocean_coords (np.array): NumPy array of (lat, lon) pairs.
        timestamp (datetime): The current time for which to predict.
        
    Returns:
        np.array: Feature matrix X for model.predict().
    """
    
    # FIX: Correctly check for empty NumPy array using .size
    if ocean_coords.size == 0:
        return np.array([])

    # FIX: Use efficient NumPy slicing instead of list comprehension
    lat_array = ocean_coords[:, 0]
    lon_array = ocean_coords[:, 1]
    
    # Feature 1: Distance to a known coastal point
    dist_to_coast = np.sqrt((lat_array - COASTAL_LAT)**2 + (lon_array - COASTAL_LON)**2)
    
    # Feature 2: Time of Day (represented as a fraction of 24 hours, suitable for cyclic features)
    time_of_day_fraction = (timestamp.hour + timestamp.minute / 60.0) / 24.0
    
    # Create the feature matrix X
    # Each row: [distance_to_coast, time_of_day_fraction]
    X = np.stack([dist_to_coast, np.full_like(lat_array, time_of_day_fraction)], axis=1)
    
    return X


# --- Main Simulation Logic (Updated) ---

def generate_ocean_prediction_list():
    """
    Step 1: Generates the static list of all ocean coordinates within the bounding box
    at the 0.01 degree resolution. This filters out all land points ONCE.
    """
    print(f"Defining grid with {GRID_STEP}° resolution over {MIN_LAT}° to {MAX_LAT}° Lat...")
    
    # Use array generation for efficiency
    lat_points = np.arange(MIN_LAT, MAX_LAT, GRID_STEP)
    lon_points = np.arange(MIN_LON, MAX_LON, GRID_STEP)
    
    total_points = len(lat_points) * len(lon_points)
    print(f"Total grid points to check: {total_points}")
    
    ocean_coords = []
    land_points_skipped = 0
    
    start_time = time.time()
    
    # Check every point against the land mask
    for lat in lat_points:
        for lon in lon_points:
            if is_ocean_location(lat, lon):
                ocean_coords.append((lat, lon))
            else:
                land_points_skipped += 1
                
    end_time = time.time()

    print("\n--- Initial Spatial Filtering Complete ---")
    print(f"Filtering time: {end_time - start_time:.2f} seconds")
    print(f"Total Ocean Prediction Points: {len(ocean_coords)}")
    print(f"Land Points Skipped: {land_points_skipped}")
    print("------------------------------------------\n")
    
    return ocean_coords

def run_time_series_predictions(ocean_prediction_list, model, num_intervals=3):
    """
    Step 2: Loops through time intervals, executing the model.predict() only 
    on the pre-filtered ocean coordinates.
    
    Args:
        ocean_prediction_list (list): The list of (lat, lon) points in the ocean.
        model (SharkPredictionModel): The initialized prediction model.
        num_intervals (int): Number of 10-minute intervals to simulate.
    """
    
    start_timestamp = datetime.now().replace(second=0, microsecond=0)
    
    print(f"Starting prediction simulation for {num_intervals} intervals using model.predict...")
    
    # Convert list of tuples to numpy array for efficient feature generation
    ocean_coords_np = np.array(ocean_prediction_list)

    for i in range(num_intervals):
        current_time = start_timestamp + timedelta(minutes=i * 10)
        
        interval_start_time = time.time()
        
        # 1. Generate features for all ocean points at the current time
        X_features = generate_prediction_features(ocean_coords_np, current_time)
        
        # 2. --- MODEL PREDICTION: using model.predict() ---
        if X_features.size > 0:
            prediction_scores = model.predict(X_features)
        else:
            prediction_scores = np.array([])
        # ----------------------------------------------------
            
        interval_end_time = time.time()
        
        # 3. Report results
        if prediction_scores.size > 0:
            max_score_index = np.argmax(prediction_scores)
            top_lat, top_lon = ocean_coords_np[max_score_index]
            max_score = prediction_scores[max_score_index]
            
            print(f"[{current_time.strftime('%Y-%m-%d %H:%M:%S')}]")
            print(f"  Features shape for prediction: {X_features.shape}")
            print(f"  Predictions generated for {len(prediction_scores)} ocean locations.")
            print(f"  Prediction Time (including feature gen): {interval_end_time - interval_start_time:.4f} seconds.")
            print(f"  Highest Risk Location: Lat={top_lat:.4f}, Lon={top_lon:.4f}")
            print(f"  Max Score: {max_score:.4f}\n")
        else:
            print(f"[{current_time.strftime('%Y-%m-%d %H:%M:%S')}] No ocean points found in bounding box.\n")


if __name__ == "__main__":
    
    # Instantiate the simulated machine learning model
    shark_model = SharkPredictionModel()
    
    # 1. Generate the static list of coordinates that require prediction
    ocean_points_to_predict = generate_ocean_prediction_list()
    
    # 2. Run the time series prediction, passing the model
    run_time_series_predictions(ocean_points_to_predict, shark_model, num_intervals=3)


SharkPredictionModel initialized. Ready for prediction.
Defining grid with 0.01° resolution over 25.0° to 25.5° Lat...
Total grid points to check: 2500

--- Initial Spatial Filtering Complete ---
Filtering time: 0.39 seconds
Total Ocean Prediction Points: 2398
Land Points Skipped: 102
------------------------------------------

Starting prediction simulation for 3 intervals using model.predict...
[2025-10-05 01:07:00]
  Features shape for prediction: (2398, 2)
  Predictions generated for 2398 ocean locations.
  Prediction Time (including feature gen): 0.0303 seconds.
  Highest Risk Location: Lat=25.1300, Lon=-77.7900
  Max Score: 1.0000

[2025-10-05 01:17:00]
  Features shape for prediction: (2398, 2)
  Predictions generated for 2398 ocean locations.
  Prediction Time (including feature gen): 0.0002 seconds.
  Highest Risk Location: Lat=25.1300, Lon=-77.7900
  Max Score: 1.0000

[2025-10-05 01:27:00]
  Features shape for prediction: (2398, 2)
  Predictions generated for 2398 ocean loca

In [2]:
# The scientific name for the Whale Shark is Rhincodon typus.

def get_rhincodon_typus_range():
    """
    Provides a list of approximate geographical ranges (lat/long) for the
    Whale Shark (Rhincodon typus), based on its known global distribution
    in tropical and warm temperate waters, as depicted in the range map.

    The coordinates represent general, broad boundaries for these regions,
    not precise habitat points. The range is generally between 30°N and 35°S.

    Returns:
        list: A list of dictionaries, each detailing a major region and its
              approximate latitude and longitude bounds.
    """

    whale_shark_range = [
        {
            "Region": "Global Tropical/Warm Temperate Zone (Broad Est.)",
            "Latitude_Range": "Approx. 30° N to 35° S",
            "Longitude_Range": "Global (excluding extreme polar and deep-sea)",
            "Notes": "This is the general, global belt of distribution."
        },
        {
            "Region": "Western Atlantic/Caribbean Sea/Gulf of Mexico",
            "Latitude_Range": "Approx. 5° N to 30° N",
            "Longitude_Range": "Approx. 50° W to 100° W",
            "Notes": "Includes areas like the Yucatan Peninsula, Florida, and Belize."
        },
        {
            "Region": "Eastern Pacific Ocean (West Coast Americas)",
            "Latitude_Range": "Approx. 10° S to 30° N",
            "Longitude_Range": "Approx. 75° W to 120° W",
            "Notes": "Includes areas like the Galapagos Islands and Sea of Cortez."
        },
        {
            "Region": "Indian Ocean (Including Red Sea/Arabian Sea)",
            "Latitude_Range": "Approx. 30° S to 30° N",
            "Longitude_Range": "Approx. 35° E to 110° E",
            "Notes": "Includes areas like the Maldives, Mozambique, and Djibouti."
        },
        {
            "Region": "Western Pacific Ocean/Southeast Asia/Oceania",
            "Latitude_Range": "Approx. 30° S to 30° N",
            "Longitude_Range": "Approx. 110° E to 180° E",
            "Notes": "Includes areas like the Philippines, Indonesia, and Australia's Ningaloo Reef."
        }
    ]
    return whale_shark_range

# Example of how to use the function:
if __name__ == "__main__":
    possible_areas = get_rhincodon_typus_range()

    print("--- Rhincodon typus (Whale Shark) Approximate Global Range ---")
    for area in possible_areas:
        print(f"\nRegion: {area['Region']}")
        print(f"  Latitude: {area['Latitude_Range']}")
        print(f"  Longitude: {area['Longitude_Range']}")
        if area['Notes']:
             print(f"  Notes: {area['Notes']}")

--- Rhincodon typus (Whale Shark) Approximate Global Range ---

Region: Global Tropical/Warm Temperate Zone (Broad Est.)
  Latitude: Approx. 30° N to 35° S
  Longitude: Global (excluding extreme polar and deep-sea)
  Notes: This is the general, global belt of distribution.

Region: Western Atlantic/Caribbean Sea/Gulf of Mexico
  Latitude: Approx. 5° N to 30° N
  Longitude: Approx. 50° W to 100° W
  Notes: Includes areas like the Yucatan Peninsula, Florida, and Belize.

Region: Eastern Pacific Ocean (West Coast Americas)
  Latitude: Approx. 10° S to 30° N
  Longitude: Approx. 75° W to 120° W
  Notes: Includes areas like the Galapagos Islands and Sea of Cortez.

Region: Indian Ocean (Including Red Sea/Arabian Sea)
  Latitude: Approx. 30° S to 30° N
  Longitude: Approx. 35° E to 110° E
  Notes: Includes areas like the Maldives, Mozambique, and Djibouti.

Region: Western Pacific Ocean/Southeast Asia/Oceania
  Latitude: Approx. 30° S to 30° N
  Longitude: Approx. 110° E to 180° E
  Notes: I

### test cases

In [3]:
import joblib
import pandas as pd

# --- Load your real model ---
# Change to your actual path (absolute or relative)
model = joblib.load("./presence_model.pkl")

# --- Define 25 Test Cases (15 original + 10 new) ---
test_cases = [
    # Original 15
    {"decimalLatitude": 20.0, "decimalLongitude": -80.0, "month": 6, "bathymetry": 500, "sst": 28.5, "sss": 35.0, "shoredistance": 150, "case_name": "Caribbean Habitat (Expected 1)"},
    {"decimalLatitude": 0.0, "decimalLongitude": -15.0, "month": 12, "bathymetry": 4000, "sst": 27.0, "sss": 34.5, "shoredistance": 2500, "case_name": "Central Atlantic (Expected 0)"},
    {"decimalLatitude": 50.0, "decimalLongitude": 0.0, "month": 3, "bathymetry": 1000, "sst": 15.0, "sss": 33.0, "shoredistance": 500, "case_name": "Too Far North (Expected 0)"},
    {"decimalLatitude": -70.0, "decimalLongitude": 100.0, "month": 8, "bathymetry": 4500, "sst": 5.0, "sss": 32.0, "shoredistance": 1000, "case_name": "Too Far South (Expected 0)"},
    {"decimalLatitude": 25.0, "decimalLongitude": -95.0, "month": 7, "bathymetry": 100, "sst": 29.0, "sss": 36.0, "shoredistance": 10, "case_name": "North America Landmass (Expected 0)"},
    {"decimalLatitude": 25.0, "decimalLongitude": 10.0, "month": 9, "bathymetry": 50, "sst": 30.0, "sss": 38.0, "shoredistance": 5, "case_name": "Afro-Eurasia Landmass (Expected 0)"},
    {"decimalLatitude": 30.0, "decimalLongitude": -150.0, "month": 5, "bathymetry": 3500, "sst": 20.0, "sss": 34.0, "shoredistance": 1000, "case_name": "North Pacific Cold Water (Expected 0)"},
    {"decimalLatitude": 30.0, "decimalLongitude": -120.0, "month": 5, "bathymetry": 100, "sst": 22.0, "sss": 35.0, "shoredistance": 50, "case_name": "Sea of Cortez (Expected 1)"},
    {"decimalLatitude": -35.0, "decimalLongitude": -10.0, "month": 1, "bathymetry": 200, "sst": 20.0, "sss": 34.5, "shoredistance": 500, "case_name": "South Atlantic Edge (Expected 1)"},
    {"decimalLatitude": -35.0, "decimalLongitude": -40.0, "month": 1, "bathymetry": 3500, "sst": 18.0, "sss": 34.0, "shoredistance": 1500, "case_name": "South Atlantic Cold Exclusion (Expected 0)"},
    {"decimalLatitude": 10.0, "decimalLongitude": -125.0, "month": 4, "bathymetry": 4500, "sst": 26.5, "sss": 33.5, "shoredistance": 1500, "case_name": "Deep Eastern Pacific Exclusion (Expected 0)"},
    {"decimalLatitude": 10.0, "decimalLongitude": 175.0, "month": 11, "bathymetry": 5000, "sst": 28.0, "sss": 35.5, "shoredistance": 3000, "case_name": "Central Pacific Exclusion (Expected 0)"},
    {"decimalLatitude": -20.0, "decimalLongitude": 15.0, "month": 2, "bathymetry": 100, "sst": 26.0, "sss": 35.0, "shoredistance": 20, "case_name": "Coastal South Africa (Expected 1)"},
    {"decimalLatitude": 15.0, "decimalLongitude": 120.0, "month": 4, "bathymetry": 50, "sst": 29.0, "sss": 34.0, "shoredistance": 10, "case_name": "Philippines Feeding Ground (Expected 1)"},
    {"decimalLatitude": -10.0, "decimalLongitude": -100.0, "month": 8, "bathymetry": 1000, "sst": 24.0, "sss": 34.8, "shoredistance": 500, "case_name": "Mid-Range Ocean (Expected 1)"},

    # New 10
    {"decimalLatitude": 5.0, "decimalLongitude": 80.0, "month": 5, "bathymetry": 200, "sst": 29.5, "sss": 34.8, "shoredistance": 30, "case_name": "Sri Lanka Coastal (Expected 1)"},
    {"decimalLatitude": -25.0, "decimalLongitude": 135.0, "month": 1, "bathymetry": 150, "sst": 27.0, "sss": 35.0, "shoredistance": 20, "case_name": "Northern Australia Coastal (Expected 1)"},
    {"decimalLatitude": 28.0, "decimalLongitude": 140.0, "month": 7, "bathymetry": 5000, "sst": 25.0, "sss": 34.0, "shoredistance": 2000, "case_name": "Open Pacific (Expected 0)"},
    {"decimalLatitude": -15.0, "decimalLongitude": -45.0, "month": 11, "bathymetry": 400, "sst": 26.0, "sss": 35.0, "shoredistance": 60, "case_name": "Brazilian Coast (Expected 1)"},
    {"decimalLatitude": -5.0, "decimalLongitude": 150.0, "month": 3, "bathymetry": 250, "sst": 28.5, "sss": 35.5, "shoredistance": 40, "case_name": "Papua New Guinea (Expected 1)"},
    {"decimalLatitude": 32.0, "decimalLongitude": -160.0, "month": 6, "bathymetry": 4000, "sst": 19.0, "sss": 34.0, "shoredistance": 1800, "case_name": "North Pacific Too Cold (Expected 0)"},
    {"decimalLatitude": -32.0, "decimalLongitude": 25.0, "month": 4, "bathymetry": 300, "sst": 21.0, "sss": 35.0, "shoredistance": 50, "case_name": "South Africa Edge (Expected 1)"},
    {"decimalLatitude": 12.0, "decimalLongitude": 40.0, "month": 9, "bathymetry": 150, "sst": 30.0, "sss": 36.0, "shoredistance": 20, "case_name": "Red Sea (Expected 1)"},
    {"decimalLatitude": -2.0, "decimalLongitude": -30.0, "month": 10, "bathymetry": 3000, "sst": 27.5, "sss": 34.7, "shoredistance": 1000, "case_name": "Equatorial Atlantic Open (Expected 0)"},
    {"decimalLatitude": 18.0, "decimalLongitude": -60.0, "month": 6, "bathymetry": 200, "sst": 28.0, "sss": 35.0, "shoredistance": 80, "case_name": "Eastern Caribbean (Expected 1)"}
]

# --- Expected outputs (25 values, 1 = habitat, 0 = unlikely) ---
expected_outputs = [
    1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,   # original 15
    1,1,0,1,1,0,1,1,0,1               # new 10
]

# --- Convert to DataFrame for prediction ---
df_test = pd.DataFrame(test_cases).drop(columns=["case_name"])

# --- Run predictions ---
predictions = model.predict(df_test)

# --- Compare with expected ---
df_results = pd.DataFrame(test_cases)
df_results["prediction"] = predictions
df_results["expected"] = expected_outputs
df_results["result"] = df_results["prediction"] == df_results["expected"]

# --- Print summary ---
print(df_results[["case_name", "expected", "prediction", "result"]])
print(f"\nTest Summary: {df_results['result'].sum()}/{len(df_results)} tests passed.")


                                      case_name  expected  prediction  result
0                Caribbean Habitat (Expected 1)         1           1    True
1                 Central Atlantic (Expected 0)         0           0    True
2                    Too Far North (Expected 0)         0           0    True
3                    Too Far South (Expected 0)         0           0    True
4           North America Landmass (Expected 0)         0           1   False
5            Afro-Eurasia Landmass (Expected 0)         0           1   False
6         North Pacific Cold Water (Expected 0)         0           0    True
7                    Sea of Cortez (Expected 1)         1           1    True
8              South Atlantic Edge (Expected 1)         1           1    True
9    South Atlantic Cold Exclusion (Expected 0)         0           0    True
10  Deep Eastern Pacific Exclusion (Expected 0)         0           0    True
11       Central Pacific Exclusion (Expected 0)         0       