# Problem definition

We wish to minimize
$$ I(u,v) = \frac{\theta}{2} \int_{\omega} |\nabla_s u + \tfrac{1}{2} \nabla v \otimes \nabla v|^{2} \mathrm{d}x
   + \frac{1}{24} \int_{\omega} |\nabla^2 v - \mathrm{Id}|^{2} \mathrm{d}x. $$

Because we only have $C^0$ elements we set $z$ for $\nabla v$ and minimize instead

$$ J(u,z) = \frac{\theta}{2} \int_{\omega} |\nabla_s u + \tfrac{1}{2} z \otimes z|^{2} \mathrm{d}x 
          + \frac{1}{24} \int_{\omega} |\nabla z - \mathrm{Id}|^{2} \mathrm{d}x 
          + \mu \int_{\omega} |\mathrm{curl}\ z|^{2} \mathrm{d}x, $$

then recover the vertical displacements (up to a constant) by minimizing

$$ F(p,q) = \tfrac{1}{2} || \nabla p - q ||^2 + \tfrac{1}{2} || q - z ||^2. $$

This we do by solving the linear problem $D F = 0$.

Minimization of the energy functional $J$ is done via gradient descent and a line search. In particular, at each timestep we compute $d_t w \in W $ such that for all $\tau \in W$:

$$ (d_t w, \tau)_{H^1_0 \times H^2_0} = -DJ(w_t)[\tau] $$

Note that the it is essential to use the full scalar product (or the one corresponding to the seminorms? check this) or we run into issues at the boundaries (to see this start with zero displacements and integrate by parts).(Also: the proper Riesz representative will only be obtained with correct scalar product).

A decoupled gradient descent in each component does not work, probably because the functional is not separately convex (see Bartels' book, p. 110, remark (iv)).

In plane displacements and gradients of out of plane displacements form a mixed function space $U \times Z$. We also have another scalar space $V$ where the potential of the out of plane gradients lives. The model is defined and solved in `run_model()` below. Experiments can be easily run in parallel with `joblib`.

In [None]:
from dolfin import *
import os
import mshr
import numpy as np
import matplotlib.pyplot as pl
from time import time
from tqdm import tqdm_notebook as tqdm
from common import make_initial_data_penalty, circular_symmetry, save_results, create_mesh
from plots import *

In [None]:
def run_model(init:str, mesh_file:str, theta:float, mu:float = 0.0,
              e_stop_mult:float=1e-5, max_steps:int=400, fname_prefix:str="vk-descent",
              save_funs:bool=True, n=0):
    """
    """
    
    msh = Mesh(mesh_file)

    t = tqdm(total=max_steps, desc='th=% 5.0f' % theta, position=n, dynamic_ncols=True)
    
    #debug = print
    def noop(*args, **kwargs):
        pass
    def tout(s, **kwargs):
        """ FIXME: Does not work as intended... """
        t.write(s, end='')
    debug = noop
    
    # in plane displacements (IPD)
    UE = VectorElement("Lagrange", msh.ufl_cell(), 2, dim=2)
    # Gradients of out of plane displacements (OPD)
    VE = VectorElement("Lagrange", msh.ufl_cell(), 2, dim=2)
    W = FunctionSpace(msh, UE*VE)
    # will store out of plane displacements
    V = FunctionSpace(msh, "Lagrange", 2)
    
    #class DirichletBoundary(SubDomain): 
    #    def inside(self, x, on_boundary):
    #        return False
    #bcU = DirichletBC(W.sub(0), Constant((0.0, 0.0)), DirichletBoundary()) 
    #bcV = DirichletBC(W.sub(1), Constant((0.0, 0.0)), DirichletBoundary())
    
    # We gather in-plane and out-of-plane displacements into one
    # Function for visualization with ParaView.
    P = VectorFunctionSpace(msh, "Lagrange", 2, dim=3)
    fax = FunctionAssigner(P.sub(0), W.sub(0).sub(0))
    fay = FunctionAssigner(P.sub(1), W.sub(0).sub(1))
    faz = FunctionAssigner(P.sub(2), V)

    disp = Function(P)
    disp.rename("disp", "displacement")

    dir = "output/" + fname_prefix.strip('-')
    try:
        os.mkdir(dir)
    except:
        pass

    file = File(dir + "/" + fname_prefix + ".pvd")  # .vtu files will have the same prefix

    w = Function(W)
    w_ = Function(W)
    u, v  = w.split()
    u_, v_ = w_.split()

    w_init = make_initial_data(init)
    w.interpolate(w_init)
    w_.interpolate(w_init)

    def eps(u):
        return (grad(u) + grad(u).T)/2.0

    e_stop = msh.hmin()*e_stop_mult
    max_line_search_steps = 20
    step = 0
    omega = 0.25   # Gradient descent fudge factor in (0, 1/2)
    _hist = {'init': init, 'mu': mu, 'theta': theta, 'e_stop' : e_stop,
             'J':[], 'alpha':[], 'du':[], 'dv':[], 'curl':[],
             'symmetry':[]}

    Id = Identity(2)
    zero_energy = assemble((1./24)*inner(Id, Id)*dx(msh))
    def energy(u, v, mu=mu):
        J = (theta/2)*inner(eps(u)+outer(v, v)/2, eps(u)+outer(v, v)/2)*dx(msh) \
            + (1./24)*inner(grad(v) - Id, grad(v) - Id)*dx(msh) \
            + mu*inner(curl(v), curl(v))*dx(msh)
        return assemble(J)

    phi, psi = TestFunctions(W)
    # CAREFUL!! Picking the right scalar product here is essential
    # Recall the issues with boundary values: integrate partially and only boundary terms survive...
    #dtu, dtv = TrialFunctions(W)
    #L = inner(dtu, phi)*dx + inner(dtv, psi)*dx \
    #    + inner(grad(dtu), grad(phi))*dx + inner(grad(dtv), grad(psi))*dx
    # The previous lines are equivalent to:
    dtw = TrialFunction(W)
    z = TestFunction(W)
    L = inner(dtw, z)*dx+inner(grad(dtw), grad(z))*dx

    dw = Function(W)
    du, dv = dw.split()

    # Output initial condition
    opd = compute_potential(v, V)
    fax.assign(disp.sub(0), u.sub(0))
    fay.assign(disp.sub(1), u.sub(1))
    faz.assign(disp.sub(2), opd)
    file << (disp, float(step))

    cur_energy = energy(u, v)
    alpha = ndu = ndv = 1.0

    debug("Solving with theta = %.2e, mu = %.2e, eps=%.2e for at most %d steps." 
          % (theta, mu, e_stop, max_steps))
    begin = time()
    while alpha*(ndu**2+ndv**2) > e_stop and step < max_steps:
        _curl = assemble(curl(v_)*dx)
        _symmetry = circular_symmetry(disp)
        _hist['curl'].append(_curl)
        _hist['symmetry'].append(_symmetry)
        debug("Step %d, energy = %.3e, curl = %.3e, symmetry = %.3f"
              % (step, cur_energy, _curl, _symmetry))

        #### Gradient
        dJ = theta * inner(eps(u_)+outer(v_, v_)/2, eps(phi))*dx(msh) \
            + theta * inner(eps(u_)+outer(v_, v_)/2, outer(v_, psi))*dx(msh) \
            + (1./12) * inner(grad(v_) - Id, grad(psi))*dx(msh) \
            + 2*mu * inner(curl(v_), curl(psi))*dx(msh)

        debug("\tSolving...", end='')
        solve(L == -dJ, dw, [])

        du, dv = dw.split()
        # dw is never reassigned to a new object so it's ok
        # to reuse du, dv without resplitting
        ndu = norm(du)
        ndv = norm(dv)

        debug(" done with |du| = %.3f, |dv| = %.3f" % (ndu, ndv))

        #### Line search
        new_energy = 0
        debug("\tSearching... ", end='')
        while True:
            w = project(w_ + alpha*dw, W)
            u, v = w.split()
            new_energy = energy(u, v)
            if new_energy <= cur_energy - omega*alpha*(ndu**2+ndv**2):
                debug(" alpha = %.2e" % alpha)
                _hist['J'].append(cur_energy)
                _hist['alpha'].append(alpha)
                _hist['du'].append(ndu)
                _hist['dv'].append(ndv)
                cur_energy = new_energy
                alpha = min(1.0, 2.0 * alpha)  # Use a larger alpha for the next line search
                break
            if alpha < (1./2)**max_line_search_steps:
                raise Exception("Line search failed after %d steps" % max_line_search_steps)
            alpha /= 2.0  # Repeat with smaller alpha

        step += 1

        #### Write displacements to file
        debug("\tSaving... ", end='')
        opd = compute_potential(v, V)
        fax.assign(disp.sub(0), u.sub(0))
        fay.assign(disp.sub(1), u.sub(1))
        faz.assign(disp.sub(2), opd)
        file << (disp, float(step))
        debug("Done.")

        w_.vector()[:] = w.vector()
        u_, v_ = w_.split()
        t.update()

    _hist['time'] = time() - begin

    if step < max_steps:
        t.total = step
        t.update()
    
    _hist['steps'] = step
    if save_funs:
        _hist['disp'] = disp
        _hist['u'] = u
        _hist['v'] = v
        _hist['dtu'] = du
        _hist['dtv'] = dv
    debug("Done after %d steps" % step)

    t.close()
    return _hist

We store outputs from different runs in a global array

In [None]:
if globals().get('history') is None:
    history = []

In [None]:
mesh_file = generate_mesh('circle', 18, 18)
theta = 0.01
_hist = run_model('ani_parab', mesh_file, theta=theta, mu=0.0,
                  fname_prefix='ani-parab-%07.2f' % theta,
                  max_steps=100, save_funs=True, e_stop_mult=1e-6)

history.append(_hist)

## History

In [None]:
def running(x, N=5):
    return np.convolve(x, np.ones((N,))/N, mode='valid')

def plots1(history:dict, _slice=slice(0,-1), running_mean_window=1):
    h = history
    pl.figure(figsize=(18,12), )
    pl.suptitle("'%s', $\\theta = %.2e$, $\mu = %.2e$, $\\epsilon = %.2e$" 
                % (h['init'], h['theta'], h['mu'], h['e_stop']))
    pl.subplot(3,2,1)
    pl.plot(running(h['du'][_slice], running_mean_window))
    pl.title('$d_{t}u$, window: %d' % running_mean_window)
    pl.subplot(3,2,2)
    pl.plot(running(h['dv'][_slice], running_mean_window))
    pl.title('$d_{t}v$, window: %d' % running_mean_window)
    pl.subplot(3,2,3)
    pl.plot(running(np.log(h['alpha'][_slice]), running_mean_window))
    pl.title('$log\ \\alpha_t$, window: %d' % running_mean_window)
    pl.subplot(3,2,4)
    pl.plot(h['curl'][_slice])
    pl.title("curl")
    pl.subplot(3,2,5)
    pl.plot(h['symmetry'][_slice])
    pl.title("symmetry")
    pl.subplot(3,2,6)
    pl.plot(h['J'][_slice])
    pl.title("Energy")

In [None]:
plots1(history[-1], slice(0,-1))

# Solution and last update

In [None]:
def plots2(history:dict):
    h = history
    pl.figure(figsize=(18,18))
    pl.subplot(2,2,1)
    plot(h['u'], title="$u_{\\theta}$ at last timestep, norm = %.2e" % norm(h['u']))
    pl.subplot(2,2,2)
    plot(h['v'], title="$v_{\\theta}$ at last timestep, norm = %.2e" % norm(h['v']))
    pl.subplot(2,2,3)
    plot(h['dtu'], title="$du_{\\theta}$ at last timestep, norm = %.2e" % norm(h['dtu']))
    pl.subplot(2,2,4)
    plot(h['dtv'], title="$dv_{\\theta}$ at last timestep, norm = %.2e" % norm(h['dtv']))

In [None]:
plots2(history[-1])

# Exploring the range of $\theta$

In [None]:
from tqdm import tqdm
from joblib import Parallel, delayed

theta_values = np.arange(0.0, 100.0, 1.0, dtype=float)
# Careful: hyperthreading won't help (we are probably bound by memory r/w)
n_jobs = min(11, len(theta_values))

new_res = Parallel(n_jobs=n_jobs)(delayed(run_model)('ani_parab', theta=theta, mu=0.0,
                                                     fname_prefix='ani-parab-%06.1f-' % theta, 
                                                     max_steps=2000, save_funs=False,
                                                     e_stop_mult=1e-6, n=n) 
                                  for n, theta in enumerate(theta_values))

In [None]:
import pickle as pk
import os

def name(r):
    return "%s_%06.1f_%3.1f_%.2e_%d" % (r['init'], r['theta'], r['mu'], r['e_stop'], r['steps'])

results_file = "results-combined.pickle"
if os.path.isfile(results_file):
    with open(results_file, "rb") as fd:
        results = pk.load(fd)
else:
    results = {}
    
new_results = {name(r): r for r in new_res}

for k, r in new_results.items():
    r['plots1_fname'] = "output/plots1-" + k +".eps"
    plots1(r)
    pl.savefig(r['plots1_fname'])
    pl.close()

    if results.get(k):
        if r['steps'] > results[k]['steps']:
            results[k] = r
    else:
        results[k] = r

with open("results-combined-carefulnottooverwrite.pickle", "wb") as f:
    pk.dump(results, f)

With increasing $\theta$ we expect the symmetry of the solution to be ever more violated until it is cylindrical rather than parabolic. However there seems to be no clear discontinuity. This can be due to 

* a poor criterion for symmetry (we are just taking the quotient of the principal axes)
* solutions not being proper minima (gradient descent didn't converge to $\epsilon_{\text{stop}}$ precision)
* ...

In [None]:
def plots3(run: dict, begin: float = 0.0, end: float = np.inf):
    frun = {k: v for k, v in run.items() if begin <= v['theta'] < end}
    r = OrderedDict(sorted(frun.items(), key=lambda x: x[1]['theta']))

    thetas = [r['theta'] for k, r in r.items()]
    syms = [1. / np.array(r['symmetry'][-4:]).mean() for k, r in r.items()]
    JJ = [np.array(r['J'][-4:]).mean() for k, r in r.items()]
    J0 = [r['J'][0] for k, r in r.items()]
    steps = [r['steps'] for k, r in r.items()]
    fig = pl.figure(figsize=(14, 14))
    ax = pl.subplot(2, 1, 1)
    pl.plot(thetas, syms, 'o', thetas, syms, 'k')
    for xy, step in zip(zip(thetas, syms), steps):
        ax.annotate('%d' % step, xy=np.array(xy), textcoords='data')
    pl.xticks(thetas, thetas)
    pl.xlabel("$\\theta$")
    pl.ylabel("Symmetry")
    pl.title("Symmetry of the solution as a function of $\\theta$")
    # pl.subplot(2,2,2)
    # pl.plot(thetas, steps)
    # pl.xticks(thetas, thetas)
    # pl.xlabel("$\\theta$")
    # pl.ylabel("Number of steps taken")
    # pl.title("Iterations to convergence")
    pl.subplot(2, 2, 3)
    pl.plot(thetas, J0)
    pl.xticks(thetas, thetas)
    pl.title("Initial energy")
    pl.xlabel("$\\theta$")
    pl.ylabel("J")
    pl.subplot(2, 2, 4)
    pl.plot(thetas, JJ)
    pl.xticks(thetas, thetas)
    pl.title("Final energy")
    pl.xlabel("$\\theta$")
    pl.ylabel("J")

In [None]:
def plots4(runs:list, _slice=slice(0,-1), running_mean_window=1):
    _runs = sorted(runs, key=lambda x: x['theta'])
    pl.figure(figsize=(18,12), )
    pl.suptitle("'%s'" % _runs[0]['init'])
    pl.subplot(3,2,1)
    for h in _runs:
        pl.plot(running(h['du'][_slice], running_mean_window), label='$\\theta = %.2f$' % h['theta'])
    pl.title('$d_{t}u$, window: %d' % running_mean_window)
    pl.legend()
    pl.subplot(3,2,2)
    for h in _runs:
        pl.plot(running(h['dv'][_slice], running_mean_window), label='$\\theta = %.2f$' % h['theta'])
    pl.title('$d_{t}v$, window: %d' % running_mean_window)
    pl.legend()
    pl.subplot(3,2,3)
    for h in _runs:
        pl.plot(running(np.log(h['alpha'][_slice]), running_mean_window), label='$\\theta = %.2f$' % h['theta'])
    pl.title('$log\ \\alpha_t$, window: %d' % running_mean_window)
    pl.legend()
    pl.subplot(3,2,4)
    for h in _runs:
        pl.plot(h['curl'][_slice], label='$\\theta = %.2f$' % h['theta'])
    pl.title("curl")
    pl.legend()
    pl.subplot(3,2,5)
    xmax = 0
    for h in _runs:
        xmax = max(xmax, len(h['symmetry'][_slice]))
        pl.plot(h['symmetry'][_slice], label='$\\theta = %.2f$' % h['theta'])
    pl.hlines(_runs[0]['symmetry'][0], xmin=0, xmax=xmax, linestyles='dotted')
    pl.title("symmetry")
    pl.legend()
    pl.subplot(3,2,6)
    for h in _runs:
        pl.plot(h['J'][_slice], label='$\\theta = %.2f$' % h['theta'])
    pl.title("Energy")
    pl.legend()

In [None]:
%matplotlib tk

In [None]:
from descent import *
import matplotlib.pyplot as pl
import pickle as pk
from collections import OrderedDict

with open("results-combined.pickle", "rb") as f:
    res = pk.load(f)

Filter runs prematurely stopped and with high values of $\epsilon_{\text{stop}}$.

In [None]:
all_eps = set(v['e_stop'] for k, v in res.items())
all_eps

In [None]:
fres = {k:v for k, v in res.items() if v['e_stop'] == min(all_eps)}
fres2 = {k:v for k, v in fres.items() if v['steps'] not in [500, 2000] }

In [None]:
plots3(fres, 8, 10)

Plot data about some weird runs:

In [None]:
def get_longest(res:dict, theta:float):
    ll = [v for k, v in res.items() if v['theta'] == theta]
    maxsteps = 0
    run = {}
    for r in ll:
        if r['steps'] > maxsteps:
            run = r
            maxteps = r['steps']
    return run

In [None]:
plots1(get_longest(res, 8.7), slice(10, -1), 20)

In [None]:
plots4([v for k, v in fres.items() if 8.5 <= v['theta'] < 10.0], slice(20, -1), 20)