In [1]:
%matplotlib inline

In [2]:
from thetis import *
from firedrake_adjoint import *

In [3]:
import adolc
import numpy as np

In [4]:
from adapt_utils.case_studies.tohoku.options.okada_options import TohokuOkadaBasisOptions
from adapt_utils.misc import taylor_test
from adapt_utils.unsteady.solver import AdaptiveProblem
from adapt_utils.unsteady.solver_adjoint import AdaptiveDiscreteAdjointProblem

# Taylor test for the gradient of timeseries misfit w.r.t. Okada parameters

See the previous notebook for a description of the derivation.
Here we wrap code in functions and perform a Taylor test.

### Initialisation

Set parameters and create a `TohokuOkadaBasisOptions` object to hold them

In [5]:
kwargs = {
    'level': 0,
    'synthetic': True,
    'plot_pvd': False,
    'save_timeseries': True,
    'family': 'dg-cg',
    'stabilisation': None,
    'okada_grid_resolution': 51,
    'debug': False,
}
nonlinear = False
print_progress = False

In [6]:
active_controls = ('slip', 'rake', 'strike', 'dip')

In [7]:
op_data = TohokuOkadaBasisOptions(**kwargs)
op_data.active_controls = active_controls

### Get timeseries data

We use a 'synthetic' approach whereby 'the truth' is given by timeseries extracted from a model run with some Okada parameters.

In [8]:
with stop_annotating():
    swp = AdaptiveProblem(op_data, nonlinear=nonlinear, print_progress=print_progress)
    swp.solve_forward()  # NOTE: pyadolc annotation is also off
    for gauge in op_data.gauges:
        op_data.gauges[gauge]["data"] = op_data.gauges[gauge]["timeseries"]



Perturb the control parameters with some Normal random noise so that the source is different from that used to generate the gauge timeseries data.

In [9]:
kwargs['control_parameters'] = op_data.control_parameters
mu = 0
sigma = 5
for control in op_data.active_controls:
    size = np.shape(op_data.control_parameters[control])
    kwargs['control_parameters'][control] += np.random.normal(loc=mu, scale=sigma, size=size)

In [10]:
op_pert = TohokuOkadaBasisOptions(**kwargs)
op_pert.active_controls = active_controls
for gauge in op_pert.gauges:
    op_pert.gauges[gauge]["data"] = op_data.gauges[gauge]["data"]

### Annotate the source model

When setting the free surface initial condition using the Okada model, we tell `pyadolc` to annotate its tape.

In [11]:
tape_tag = 0
swp = AdaptiveDiscreteAdjointProblem(op_pert, nonlinear=nonlinear, print_progress=print_progress)
swp.set_initial_condition(annotate_source=True, tag=tape_tag)



### Annotate the tsunami model

Now `pyadjoint` annotates its tape.

In [12]:
init_pert = swp.fwd_solutions[0].copy(deepcopy=True)
source_control = Control(swp.fwd_solutions[0])
swp.setup_solver_forward(0)
swp.solve_forward_step(0)
J_pert = op_pert.J
print("QoI: {:.4e}".format(J_pert))

QoI: 1.4827e+05


In [13]:
m_init = op_pert.get_input_vector()
controls_pert = op_pert.control_parameters.copy()

### Reduced functional

Create a `pyadjoint` `ReducedFunctional` and a callable version

In [14]:
Jhat = ReducedFunctional(swp.op.J, source_control)
rf_np = pyadjoint.reduced_functional_numpy.ReducedFunctionalNumPy(Jhat)

In [15]:
stop_annotating();

In [16]:
def reduced_functional(m):
    """
    Given a vector of control parameters `m`:
      1. run the Okada model to generate the corresponding dislocation field;
      2. set an initial ocean free surface displacement from the dislocation field;
      3. run the tsunami propagation model by traversing the pyadjoint tape.
      
    :return: the corresponding square timeseries error misfit functional value.
    """
    op = swp.op
    num_subfaults = len(op.subfaults)
    num_active_controls = len(op.active_controls)
    
    # Extract active control parameter values from the input vector
    m_arr = m.reshape(num_subfaults, num_active_controls)
    for i, subfault in enumerate(op.subfaults):
        for j, control in enumerate(op.active_controls):
            op.control_parameters[control][i] = m_arr[i, j]
    
    # Set initial condition based on the control parameters 
    swp.set_fields()
    try:
        swp.set_initial_condition(annotate_source=False)
    except TypeError:
        op_pert.get_subfaults(reset=True)  # FIXME
        swp.set_initial_condition(annotate_source=False)
        
    # Update the initial condition data stored in the ReducedFunctional object
    init = swp.fwd_solutions[0].copy(deepcopy=True)
    Jhat.controls[0].update(init)
    
    # Check the initial conditions match (debugging)
    if np.allclose(m, m_init):
        msg = "Initial conditions do not match: {:.4e} vs. {:.4e}"
        assert np.isclose(errornorm(init, init_pert), 0.0), msg.format(norm(init), norm(init_pert))
    
    # Extract an array from the initial condition data and pass it to the NumPy reduced functional
    sol = rf_np.obj_to_array(init)
    return rf_np.__call__(sol)

In [17]:
J = reduced_functional(m_init)
print("QoI: {:.4e}".format(J))

QoI: 1.4827e+05


In [18]:
for control in active_controls:
    assert np.allclose(swp.op.control_parameters[control], controls_pert[control])

In [19]:
assert np.isclose(J_pert, J)

### Gradient

In [20]:
def gradient(m):
    """
    Given a vector of control parameters `m`, compute the gradient of the reduced functional.
    This is done as follows:
      1. Propagate unity through the *reverse* mode of AD on pyadjoint's tape to endow
         its SolveBlocks with adjoint solutions. Extract the one corresponding to time t = 0.
      2. Propagate `m` through the *forward* mode of AD on pyadolc's tape using an appropriate
         seed matrix. This computes the derivative of the Okada model.
      3. Assemble the gradient by integrating the L2 inner product of adjoint solution at time
         t = 0 with each component of the derivative of the Okada model.
    """
    op = swp.op
    num_subfaults = len(op.subfaults)
    num_active_controls = len(op.active_controls)
    N = op.N
    
    # Differentiate tsunami propagation
    swp.print("Differentiating the tsunami propagation model...")
    Jhat.derivative()
    swp.get_solve_blocks()
    swp.extract_adjoint_solution(0)
    u_star, eta_star = swp.adj_solutions[0].split()
    swp.print("Done!")
    
    # Differentiate Okada
    swp.print("Differentiating the Okada model...")
    if not hasattr(op, 'seed_matrices'):
        op.get_seed_matrices()
    F, dFdm = adolc.fov_forward(tape_tag, m, op.seed_matrices)
    F = F.reshape(num_subfaults, N, N)
    F = sum(F[i, :, :] for i in range(num_subfaults))
    dFdm = dFdm.reshape(num_subfaults, N, N, num_active_controls)
    swp.print("Done!")
    
    # Assemble gradient
    swp.print("Assembling gradient...")
    dJdm = np.zeros((num_subfaults, num_active_controls))
    for i, subfault in enumerate(op.subfaults):
        for j, control in enumerate(op.active_controls):
            deta0dm = op.interpolate_okada_array(dFdm[i, :, :, j])
            dJdm[i, j] = assemble(eta_star*deta0dm*dx)
    swp.print("Done!")
            
    return dJdm.flatten()

### Taylor test

In [23]:
# np.random.seed(23)

In [24]:
taylor_test(reduced_functional, gradient, m_init, verbose=True)

h = 1.0000e+00
Taylor remainder = 5.7483e+02
h = 5.0000e-01
Taylor remainder = 3.0032e+02


ConvergenceError: Taylor remainders do not decrease quadratically (ratio 1.9140e+00 < 3.9500e+00)

# [FIXME]

In [21]:
taylor_test(reduced_functional, gradient, m_init, verbose=True, ratio_tol=1.8)  # Check for linear convergence

h = 1.0000e+00
Taylor remainder = 1.0399e+02
h = 5.0000e-01
Taylor remainder = 4.8174e+01
h = 2.5000e-01
Taylor remainder = 2.3397e+01
