### Creating Virtual Bouy

In [1]:
import xarray as xr
import numpy as np

# Load dataset
ds = xr.open_dataset("surf_data_2020.nc")

# Arugam Bay coordinates
spot_lat, spot_lon = 6.8399, 81.8396

# Select nearest grid point
nearest = ds.sel(latitude=spot_lat, longitude=spot_lon, method="nearest")

# Check if it's ocean (non-NaN swell)
if np.isnan(nearest["shts"].isel(valid_time=0)):
    print("Nearest point on land. Searching for closest ocean point...")

    # Extract first timestep of swell data
    shts_data = ds["shts"].isel(valid_time=0).stack(point=("latitude", "longitude")).dropna("point")

    # Compute Haversine distance
    R = 6371  # Earth radius (km)
    lat1, lon1 = np.radians(spot_lat), np.radians(spot_lon)
    lat2, lon2 = np.radians(shts_data.latitude), np.radians(shts_data.longitude)
    d = 2 * R * np.arcsin(np.sqrt(
        np.sin((lat2 - lat1) / 2) ** 2 +
        np.cos(lat1) * np.cos(lat2) * np.sin((lon2 - lon1) / 2) ** 2
    ))

    closest = shts_data.isel(point=d.argmin())
    target_lat, target_lon = float(closest.latitude), float(closest.longitude)
else:
    print("Found valid offshore point.")
    target_lat, target_lon = float(nearest.latitude), float(nearest.longitude)

print(f"Virtual buoy for Arugam Bay: ({target_lat:.2f}, {target_lon:.2f})")

# Extract target gridpoint data (ConvLSTM input)
features = ["u10", "v10", "msl", "tp", "shts", "mpts", "mdts"]
target_data = ds[features].sel(latitude=target_lat, longitude=target_lon) # bouy-specific data

Nearest point on land. Searching for closest ocean point...
Virtual buoy for Arugam Bay: (7.00, 82.00)


In [2]:
from sklearn.preprocessing import StandardScaler

# --- Extract features and targets ---
X = ds[features].to_array(dim="channel").transpose("valid_time", "latitude", "longitude", "channel")
target_vars = ["shts", "mpts", "mdts", "u10", "v10"]
Y = np.stack([target_data[var].values for var in target_vars], axis=1)

# --- Clean NaNs/Infs ---
X_values = np.nan_to_num(X.values, nan=0.0, posinf=0.0, neginf=0.0)
Y = np.nan_to_num(Y, nan=0.0, posinf=0.0, neginf=0.0)

# --- Variable-wise normalization (feature scaling per channel) ---
scalers = [StandardScaler().fit(X_values[..., i].reshape(-1, 1)) for i in range(X_values.shape[-1])]
X_scaled = np.stack([scalers[i].transform(X_values[..., i].reshape(-1, 1)).reshape(X_values[..., i].shape)
                     for i in range(X_values.shape[-1])], axis=-1)

# --- Encode mdts (direction) as sine & cosine ---
mdts_rad = np.deg2rad(Y[:, 2])
Y = np.column_stack([Y[:, 0], Y[:, 1], np.sin(mdts_rad), np.cos(mdts_rad), Y[:, 3], Y[:, 4]])

# --- Normalize Y (each column separately) ---
Y_scaler = StandardScaler().fit(Y)
Y_scaled = Y_scaler.transform(Y)

# --- Create sliding windows ---
def create_sequences(X, Y, lookback, lookahead):
    X_out, Y_out = [], []
    for i in range(len(X) - lookback - lookahead + 1):
        X_out.append(X[i:i+lookback])
        Y_out.append(Y[i + lookback + lookahead - 1])
    return np.array(X_out), np.array(Y_out)

LOOKBACK, LOOKAHEAD = 16, 1
X_train, Y_train = create_sequences(X_scaled, Y_scaled, lookback=LOOKBACK, lookahead=LOOKAHEAD)

print(f"X_train shape: {X_train.shape}")
print(f"Y_train shape: {Y_train.shape}")


X_train shape: (1448, 16, 21, 21, 7)
Y_train shape: (1448, 6)


In [3]:
# Step 1: Check for NaNs/Infs
print(np.isnan(X_train).any(), np.isinf(X_train).any())
print(np.isnan(Y_train).any(), np.isinf(Y_train).any())

# Step 2: Check value ranges
print("X range:", X_train.min(), X_train.max())
print("Y range:", Y_train.min(), Y_train.max())

False False
False False
X range: -4.5366464 51.446213
Y range: -3.4284742 4.3136654


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, Flatten, Dense, BatchNormalization

# Get the input shape from your training data
# (lookback, height, width, channels)
input_shape = X_train.shape[1:] 
output_shape = Y_train.shape[1] # 5

model = Sequential([
    # This layer reads the "movie" (8 frames of 21x21x6)
    ConvLSTM2D(
        filters=32,
        kernel_size=(3, 3),
        padding='same',
        return_sequences=False, # Only output the last time step
        input_shape=input_shape
    ),
    BatchNormalization(),
    
    # Flatten the final map into a 1D vector
    Flatten(),
    
    # Dense layers to interpret the features
    Dense(64, activation='relu'),
    
    # Output layer: 5 neurons for 5 target variables
    # Use 'linear' activation for regression
    Dense(output_shape, activation='linear')
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d (ConvLSTM2D)    (None, 21, 21, 32)        45056     
                                                                 
 batch_normalization (Batch  (None, 21, 21, 32)        128       
 Normalization)                                                  
                                                                 
 flatten (Flatten)           (None, 14112)             0         
                                                                 
 dense (Dense)               (None, 64)                903232    
                                                                 
 dense_1 (Dense)             (None, 6)                 390       
                                                                 
Total params: 948806 (3.62 MB)
Trainable params: 948742 (3.62 MB)
Non-trainable params: 64 (256.00 Byte)
_________________

In [5]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
history = model.fit(
    X_train, Y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

# Make a Prediction (next 6 hours)
last_sequence = np.expand_dims(X_scaled[-LOOKBACK:], axis=0)
pred_scaled = model.predict(last_sequence)
pred_real = Y_scaler.inverse_transform(pred_scaled)

print("\n--- Offshore Forecast (Input for SWAN) ---")
print(f"Swell Height:   {pred_real[0, 0]:.2f} m")
print(f"Swell Period:   {pred_real[0, 1]:.2f} s")
print(f"Swell Direction:{pred_real[0, 2]:.2f}°")
print(f"U-Wind:         {pred_real[0, 3]:.2f} m/s")
print(f"V-Wind:         {pred_real[0, 4]:.2f} m/s")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

--- Offshore Forecast (Input for SWAN) ---
Swell Height:   1.12 m
Swell Period:   8.28 s
Swell Direction:0.34°
U-Wind:         -0.73 m/s
V-Wind:         0.43 m/s


In [6]:
# --- Evaluate model performance ---
# Predict on validation data
val_size = int(0.2 * len(X_train))
Y_val_true = Y_train[-val_size:]
Y_val_pred = model.predict(X_train[-val_size:])

# Inverse transform back to original scale
Y_val_true_real = Y_scaler.inverse_transform(Y_val_true)
Y_val_pred_real = Y_scaler.inverse_transform(Y_val_pred)

# Compute metrics
mae = mean_absolute_error(Y_val_true_real, Y_val_pred_real)
rmse = np.sqrt(mean_squared_error(Y_val_true_real, Y_val_pred_real))
r2 = r2_score(Y_val_true_real, Y_val_pred_real)

print("\n--- Validation Metrics ---")
print(f"MAE :  {mae:.4f}")
print(f"RMSE:  {rmse:.4f}")
print(f"R²   :  {r2:.4f}")


--- Validation Metrics ---
MAE :  1.1750
RMSE:  1.9228
R²   :  -0.2918


In [7]:
for i, var in enumerate(["shts", "mpts", "mdts", "u10", "v10"]):
    mae_i = mean_absolute_error(Y_val_true_real[:, i], Y_val_pred_real[:, i])
    rmse_i = np.sqrt(mean_squared_error(Y_val_true_real[:, i], Y_val_pred_real[:, i]))
    r2_i = r2_score(Y_val_true_real[:, i], Y_val_pred_real[:, i])
    print(f"{var:>5s} | MAE={mae_i:.3f}, RMSE={rmse_i:.3f}, R²={r2_i:.3f}")

 shts | MAE=0.159, RMSE=0.203, R²=-0.150
 mpts | MAE=0.825, RMSE=0.998, R²=-0.291
 mdts | MAE=0.450, RMSE=0.498, R²=-0.319
  u10 | MAE=0.448, RMSE=0.581, R²=-0.285
  v10 | MAE=1.846, RMSE=2.253, R²=-0.124


In [None]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, Y_train, epochs=20, batch_size=32, validation_split=0.2)

# --- Make a Prediction ---
# Get the last 48 hours of data to predict the next 6 hours
last_sequence = X_scaled[-LOOKBACK:]
last_sequence = np.expand_dims(last_sequence, axis=0) # Add batch dimension

predicted_scaled = model.predict(last_sequence)

In [None]:
# Use the Y_scaler we created in Step 2
predicted_real = Y_scaler.inverse_transform(predicted_scaled)

print("--- Offshore Forecast (Input for SWAN) ---")
print(f"Swell Height: {predicted_real[0, 0]:.2f} m")
print(f"Swell Period: {predicted_real[0, 1]:.2f} s")
print(f"Swell Direction: {predicted_real[0, 2]:.2f} degrees")
print(f"U-Wind: {predicted_real[0, 3]:.2f} m/s")
print(f"V-Wind: {predicted_real[0, 4]:.2f} m/s")