In [1]:
import os
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "False"

import deepxde as dde
import matplotlib.pyplot as plt
import numpy as np

from ADR_solver import solve_ADR

"""Solve 1D Advection-Diffusion-Reaction
u_t = (k(x) u_x)_x - v(x) u_x + g(u) + f(x, t)
with zero boundary condition.
"""

# PDE
def pde(x, y, v):
    D = 0.01
    k = 0.01
    dy_t = dde.grad.jacobian(y, x, j=1)
    dy_xx = dde.grad.hessian(y, x, j=0)
    return dy_t - D * dy_xx + k * y**2 - v


geom = dde.geometry.Interval(0, 1)
timedomain = dde.geometry.TimeDomain(0, 1)
geomtime = dde.geometry.GeometryXTime(geom, timedomain)

bc = dde.icbc.DirichletBC(geomtime, lambda _: 0, lambda _, on_boundary: on_boundary)
ic = dde.icbc.IC(geomtime, lambda _: 0, lambda _, on_initial: on_initial)

pde = dde.data.TimePDE(
    geomtime,
    pde,
    [bc, ic],
    num_domain=200,
    num_boundary=40,
    num_initial=20,
    num_test=500,
)

# Function space
func_space = dde.data.GRF(length_scale=0.2)

# Data
eval_pts = np.linspace(0, 1, num=50)[:, None]
data = dde.data.PDEOperator(
    pde, func_space, eval_pts, 1000, function_variables=[0], num_test=1000
)

# Net
net = dde.nn.DeepONet(
    [50, 64, 64, 64],
    [2, 64, 64, 64],
    "tanh",
    "Glorot normal",
)

model = dde.Model(data, net)
model.compile("adam", lr=0.0005)
losshistory, train_state = model.train(iterations=50000)
dde.utils.plot_loss_history(losshistory)

func_feats = func_space.random(1)
xs = np.linspace(0, 1, num=50)[:, None]
v = func_space.eval_batch(func_feats, xs)[0]
x, t, u_true = solve_ADR(
    0,
    1,
    0,
    1,
    lambda x: 0.01 * np.ones_like(x),
    lambda x: np.zeros_like(x),
    lambda u: 0.01 * u**2,
    lambda u: 0.02 * u,
    lambda x, t: np.tile(v[:, None], (1, len(t))),
    lambda x: np.zeros_like(x),
    100,
    100,
)
u_true = u_true.T
plt.figure()
plt.imshow(u_true)
plt.colorbar()

v_branch = func_space.eval_batch(func_feats, np.linspace(0, 1, num=50)[:, None])[0]
xv, tv = np.meshgrid(x, t)
x_trunk = np.vstack((np.ravel(xv), np.ravel(tv))).T
u_pred = model.predict((np.tile(v_branch, (100 * 100, 1)), x_trunk))
u_pred = u_pred.reshape((100, 100))
print(dde.metrics.l2_relative_error(u_true, u_pred))
plt.figure()
plt.imshow(u_pred)
plt.colorbar()
plt.show()

Using backend: tensorflow.compat.v1
Other supported backends: tensorflow, pytorch, jax, paddle.
paddle supports more examples now and is recommended.


Instructions for updating:
non-resource variables are not supported in the long term


2023-11-16 02:59:32.524040: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-16 02:59:32.558154: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-16 02:59:32.558435: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


Enable just-in-time compilation with XLA.


Compiling model...
Building DeepONet...
'build' took 0.079083 s



  return tf.layers.dense(
2023-11-16 02:59:36.335031: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-16 02:59:36.336813: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-16 02:59:36.337217: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-16 02:59:36.337672: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel

'compile' took 1.467830 s

Initializing variables...
Training model...



2023-11-16 02:59:37.731293: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2023-11-16 02:59:37.865821: I tensorflow/compiler/xla/service/service.cc:170] XLA service 0x7f5fbc015a60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-11-16 02:59:37.865868: I tensorflow/compiler/xla/service/service.cc:178]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
2023-11-16 02:59:37.900440: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:263] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-11-16 02:59:51.561628: I tensorflow/compiler/jit/xla_compilation_cache.cc:478] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Step      Train loss                        Test loss                         Test metric
0         [1.66e+00, 4.26e-01, 1.10e-02]    [1.46e+00, 4.26e-01, 1.10e-02]    []  


2023-11-16 03:00:13.758238: W tensorflow/core/common_runtime/bfc_allocator.cc:360] Garbage collection: deallocate free memory regions (i.e., allocations) so that we can re-allocate a larger region to avoid OOM due to memory fragmentation. If you see this message frequently, you are running near the threshold of the available device memory and re-allocation may incur great performance overhead. You may try smaller batch sizes to observe the performance impact. Set TF_ENABLE_GPU_GARBAGE_COLLECTION=false if you'd like to disable this feature.
2023-11-16 03:00:25.888342: W tensorflow/core/common_runtime/bfc_allocator.cc:479] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.27GiB (rounded to 4590147328)requested by op cluster_2_1/xla_run
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-11-16 03:00:25.888405: I tensorflo

ResourceExhaustedError: Graph execution error:

Out of memory while trying to allocate 4590147232 bytes.
BufferAssignment OOM Debugging.
BufferAssignment stats:
             parameter allocation:   64.98MiB
              constant allocation:    2.44MiB
        maybe_live_out allocation:   285.1KiB
     preallocated temp allocation:    4.27GiB
  preallocated temp fragmentation:         0B (0.00%)
                 total allocation:    4.34GiB
              total fragmentation:    2.53MiB (0.06%)
Peak buffers:
	Buffer 1:
		Size: 156.25MiB
		Operator: op_type="MatMul" op_name="gradients_2/gradients_1/dense_5/MatMul_grad/MatMul_grad/MatMul"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 2:
		Size: 156.25MiB
		Operator: op_type="MatMul" op_name="gradients_2/gradients/dense_5/MatMul_grad/MatMul_grad/MatMul"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 3:
		Size: 156.25MiB
		Operator: op_type="MatMul" op_name="gradients_2/gradients_1/dense_4/MatMul_grad/MatMul_grad/MatMul"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 4:
		Size: 156.25MiB
		Operator: op_type="MatMul" op_name="gradients_2/gradients_1/dense_3/MatMul_grad/MatMul_grad/MatMul"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 5:
		Size: 156.25MiB
		Operator: op_type="AddN" op_name="gradients_1/AddN_1"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 6:
		Size: 156.25MiB
		Operator: op_type="AddN" op_name="gradients_1/AddN"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 7:
		Size: 156.25MiB
		Operator: op_type="MatMul" op_name="gradients_1/gradients/dense_5/MatMul_grad/MatMul_grad/MatMul"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 8:
		Size: 156.25MiB
		Operator: op_type="MatMul" op_name="gradients_1/gradients/dense_4/MatMul_grad/MatMul_grad/MatMul"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 9:
		Size: 156.25MiB
		Operator: op_type="MatMul" op_name="gradients_1/gradients/dense_3/MatMul_grad/MatMul_grad/MatMul"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 10:
		Size: 156.25MiB
		Operator: op_type="MatMul" op_name="gradients/dense_4/MatMul_grad/MatMul"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 11:
		Size: 156.25MiB
		Operator: op_type="MatMul" op_name="gradients/dense_5/MatMul_grad/MatMul"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 12:
		Size: 156.25MiB
		Operator: op_type="BiasAdd" op_name="dense_2/BiasAdd"
		XLA Label: custom-call
		Shape: f32[320000,128]
		==========================

	Buffer 13:
		Size: 156.25MiB
		Operator: op_type="Tanh" op_name="dense_1/Tanh"
		XLA Label: tanh
		Shape: f32[320000,128]
		==========================

	Buffer 14:
		Size: 156.25MiB
		Operator: op_type="Tanh" op_name="dense/Tanh"
		XLA Label: tanh
		Shape: f32[320000,128]
		==========================

	Buffer 15:
		Size: 156.25MiB
		Operator: op_type="Tanh" op_name="dense_4/Tanh"
		XLA Label: tanh
		Shape: f32[320000,128]
		==========================


	 [[{{node cluster_2_1/xla_run}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.