In [7]:
import deepxde as dde
import os
os.environ["DDEBACKEND"] = "pytorch"
import numpy as np
import xarray as xr

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt

import sys
sys.path.append("../")

from src.gap_data_utils import *

%matplotlib inline

ModuleNotFoundError: No module named 'src.gap_data_utils'

In [94]:
# Coordinates extraction for training
def get_random_coords(gappy_data, train_size, n_samples):
    coord_array = np.empty((train_size, n_samples), dtype=float)
    for t_idx in range(train_size):
        chl_data = gappy_data["CHL_cmes-level3"].isel(time=t_idx).values.flatten()
        valid_coords = np.argwhere(~np.isnan(chl_data))
        if len(valid_coords) > 0:
            selected_coords = valid_coords[
                np.random.choice(len(valid_coords), n_samples, replace=False)
            ]
            coord_array[t_idx] = selected_coords.flatten()
    return coord_array

(292, 100)


In [95]:
train_ims_flat = train_ims.reshape(292, 176 * 240, 6)
test_ims_flat = test_ims.reshape(73, 176 * 240, 6)
train_ims_flat.shape, test_ims_flat.shape, coordinates_array_train.shape

((292, 42240, 6), (73, 42240, 6), (292, 100))

In [113]:
my_inds.shape

(292, 100)

In [111]:
my_inds = coordinates_array_train.astype(int)
result = train_ims_flat[np.arange(292)[:, None], my_inds, :]

In [112]:
result.shape

(292, 100, 6)

In [86]:
def extract_values_at_coordinates(X, coordinates_array_train):
    dataset_size, lat_extent, lon_extent, variables = X.shape
    num_coordinates = coordinates_array_train.shape[1]

    # Initialize an array to store the extracted values
    extracted_values = np.zeros((dataset_size, num_coordinates, variables))

    # Loop through each day in the dataset
    for i in range(dataset_size):
        coords = coordinates_array_train[i]
        for coord in range(num_coordinates):
            lat_coord = coords[coord, 0]
            lon_coord = coords[coord, 1]
            lat_idx = np.where(global_lat == lat_coord)
            lon_idx = np.where(global_lon == lon_coord)
            extracted_values[i, coord] = X[i, lat_idx, lon_idx]

    return extracted_values


X_train_branch = extract_values_at_coordinates(train_ims, coordinates_array_train)
print(X_train_branch.shape)
print(X_train_branch[0, :, 1])

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

In [56]:
def extract_values_at_coordinates2(X, coordinates_array_train):
    dataset_size, lat_extent, lon_extent = X.shape
    num_coordinates = coordinates_array_train.shape[1]

    # Initialize an array to store the extracted values
    extracted_values = np.zeros((dataset_size, num_coordinates))

    # Loop through each day in the dataset
    for i in range(dataset_size):
        coords = coordinates_array_train[i]
        lat_coords = coords[:, 0]
        lon_coords = coords[:, 1]

        # Convert lat/lon coordinates to indices
        lat_indices = np.clip(np.round(lat_coords).astype(int), 0, lat_extent - 1)
        lon_indices = np.clip(np.round(lon_coords).astype(int), 0, lon_extent - 1)

        # Extract values from X_train_branch
        for j in range(num_coordinates):
            lat_idx = lat_indices[j]
            lon_idx = lon_indices[j]
            extracted_values[i, j] = X[i, lat_idx, lon_idx]

    return extracted_values

In [53]:
# Coordinates extraction for testing
coordinates_array_test = np.empty((test_size, 100, 2), dtype=float)
for t_idx in range(test_size):
    chl_data = gappy_data["CHL_cmes-level3"].isel(time=t_idx + train_size).values
    valid_coords = np.argwhere(~np.isnan(chl_data))
    if len(valid_coords) > 0:
        selected_coords = valid_coords[
            np.random.choice(len(valid_coords), 100, replace=False)
        ]
        latitudes = lat[selected_coords[:, 0]]
        longitudes = lon[selected_coords[:, 1]]
        coordinates_array_test[t_idx] = np.column_stack((latitudes, longitudes))
print(coordinates_array_test.shape)

(73, 100, 2)


In [54]:
X_test_branch = extract_values_at_coordinates(X_test_branch, coordinates_array_test)
print(X_test_branch.shape)

(73, 100, 6)


In [57]:
# Prepare y_train and y_test
y_train = zarr_ds.sel(time=time_slice).values[:train_size].astype(np.float32)
y_train = extract_values_at_coordinates2(y_train, coordinates_array_train)
y_test = zarr_ds.sel(time=time_slice).values[train_size:].astype(np.float32)
y_test = extract_values_at_coordinates2(y_test, coordinates_array_test)
print(y_train.shape)
print(y_test.shape)

(292, 100)
(73, 100)


In [58]:
# Flatten trunk inputs
X_train_trunk = coordinates_array_train
X_test_trunk = coordinates_array_test
print(X_train_trunk.shape)
print(X_test_trunk.shape)

(292, 100, 2)
(73, 100, 2)


In [68]:
X_train_trunk.shape, y_train.shape

((292, 100, 2), (292, 100))

In [67]:
# DeepONet setup
m = len(variables)  # Number of variables
dim_x = 2  # Latitude and Longitude

net = dde.nn.DeepONet(
    [m, 40, 40],  # Branch net architecture
    [dim_x, 40, 40],  # Trunk net architecture
    "relu",
    "Glorot normal",
)

data = dde.data.Triple(
    X_train=(X_train_branch.astype(np.float32), X_train_trunk.astype(np.float32)),
    y_train=y_train.astype(np.float32),
    X_test=(X_test_branch.astype(np.float32), X_test_trunk.astype(np.float32)),
    y_test=y_test.astype(np.float32),
)

model = dde.Model(data, net)
model.compile("adam", lr=0.001, metrics=["mean l2 relative error"])
losshistory, train_state = model.train(iterations=10000)

Compiling model...
'compile' took 0.000194 s

Training model...



RuntimeError: einsum(): the number of subscripts in the equation (2) does not match the number of dimensions (3) for operand 0 and no ellipsis was given