Working on adjoint, planning to remove temporarily pascal_lite support

qiqi · Oct 17, 2017 · 2aa3b71 · 2aa3b71
1 parent cc6519e
commit 2aa3b71
Show file tree

Hide file tree

Showing 4 changed files with 114 additions and 5 deletions.
diff --git a/fds/fds.py b/fds/fds.py
@@ -198,6 +198,9 @@ def shadowing(
                    J:            quantities of interest, a numpy array of shape
                                  (steps, n_qoi), where n_qoi is an arbitrary
                                  but consistent number, # quantities of interest.
+    returns: (J, G)
+        J: Time-averaged objective function, array of length n_qoi.
+        G: Derivative of time-averaged objective function, array of length n_qoi
     '''
     u0 = pascal.symbolic_array(field=u0)
 

diff --git a/fds/lsstan.py b/fds/lsstan.py
@@ -2,7 +2,6 @@
 from scipy import sparse
 import scipy.sparse.linalg as splinalg
 
-from .timeseries import windowed_mean
 import pascal_lite as pascal
 
 class LssTangent:
@@ -33,6 +32,9 @@ def checkpoint(self, V, v):
         self.bs.append(b)
         return V, v
 
+    def adjoint_checkpoint(self, i, V, v_adj, b_adj):
+        R = self.Rs[i]
+
     def solve(self):
         R, b = np.array(self.Rs), np.array(self.bs)
         assert R.ndim == 3 and b.ndim == 2
@@ -42,13 +44,31 @@ def solve(self):
         eyes = np.eye(subdim, subdim) * np.ones([nseg, 1, 1])
         matrix_shape = (subdim * nseg, subdim * (nseg+1))
         I = sparse.bsr_matrix((eyes, np.r_[1:nseg+1], np.r_[:nseg+1]))
-        D = sparse.bsr_matrix((R, np.r_[:nseg], np.r_[:nseg+1]), shape=matrix_shape)
+        D = sparse.bsr_matrix((R, np.r_[:nseg], np.r_[:nseg+1]),
+                              shape=matrix_shape)
         B = (D - I).tocsr()
         Schur = B * B.T #+ 1E-5 * sparse.eye(B.shape[0])
         alpha = -(B.T * splinalg.spsolve(Schur, np.ravel(b)))
         # alpha1 = splinalg.lsqr(B, ravel(bs), iter_lim=10000)
         return alpha.reshape([nseg+1,-1])[:-1]
 
+    def adjoint(self, alpha_adj):
+        R = np.array(self.Rs)
+        assert R.ndim == 3
+        assert R.shape[0] == alpha_adj.shape[0]
+        assert R.shape[1] == R.shape[2] == alpha_adj.shape[1]
+        nseg, subdim = alpha_adj.shape
+        alpha_adj = np.vstack([alpha_adj, np.zeros([1, subdim])]).ravel()
+        eyes = np.eye(subdim, subdim) * np.ones([nseg, 1, 1])
+        matrix_shape = (subdim * nseg, subdim * (nseg+1))
+        I = sparse.bsr_matrix((eyes, np.r_[1:nseg+1], np.r_[:nseg+1]))
+        D = sparse.bsr_matrix((R, np.r_[:nseg], np.r_[:nseg+1]),
+                              shape=matrix_shape)
+        B = (D - I).tocsr()
+        Schur = B * B.T #+ 1E-5 * sparse.eye(B.shape[0])
+        b_adj = -splinalg.spsolve(Schur, B * alpha_adj)
+        return b_adj.reshape([nseg, subdim])
+
     def solve_ivp(self):
         a = [np.zeros(self.bs[0].shape)]
         for i in range(len(self.bs)):

diff --git a/fds/timeseries.py b/fds/timeseries.py
@@ -7,9 +7,12 @@ def mean_std(x):
     return x.mean(0), x_i.std(0) / sqrt(len(x_i))
 
 def windowed_mean(a):
-    win = sin(linspace(0, pi, a.shape[0]+2)[1:-1])**2
-    # win = ones(a.shape[0]) 
-    return (a * win[:,newaxis]).sum(0) / win.sum()
+    win = windowed_mean_weights(a.shape[0])
+    return dot(win, a)
+
+def windowed_mean_weights(n):
+    win = sin(linspace(0, pi, n+2)[1:-1])**2
+    return win / win.sum()
 
 def exp_cum_mean(x):
     x, x_mean = array(x), []

diff --git a/tests/test_lorenz_adjoint.py b/tests/test_lorenz_adjoint.py
@@ -0,0 +1,83 @@
+from __future__ import division
+
+import os
+import sys
+import shutil
+import tempfile
+from subprocess import *
+
+from numpy import *
+
+from fds.timeseries import windowed_mean_weights, windowed_mean
+from fds.segment import run_segment, trapez_mean
+
+my_path = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.join(my_path, '..'))
+
+from fds import *
+
+solver_path = os.path.join(my_path, 'solvers', 'lorenz')
+solver = os.path.join(solver_path, 'solver')
+adj_solver = os.path.join(solver_path, 'adjoint')
+u0 = loadtxt(os.path.join(solver_path, 'u0'))
+
+def solve(u, s, nsteps):
+    tmp_path = tempfile.mkdtemp()
+    with open(os.path.join(tmp_path, 'input.bin'), 'wb') as f:
+        f.write(asarray(u, dtype='>d').tobytes())
+    with open(os.path.join(tmp_path, 'param.bin'), 'wb') as f:
+        f.write(asarray([10, s, 8./3], dtype='>d').tobytes())
+    call([solver, str(int(nsteps))], cwd=tmp_path)
+    with open(os.path.join(tmp_path, 'output.bin'), 'rb') as f:
+        out = frombuffer(f.read(), dtype='>d')
+    with open(os.path.join(tmp_path, 'objective.bin'), 'rb') as f:
+        J = frombuffer(f.read(), dtype='>d')
+    shutil.rmtree(tmp_path)
+    return out, J
+
+def adjoint(u, s, ua, nsteps):
+    tmp_path = tempfile.mkdtemp()
+    with open(os.path.join(tmp_path, 'input.bin'), 'wb') as f:
+        f.write(asarray(u, dtype='>d').tobytes())
+    with open(os.path.join(tmp_path, 'param.bin'), 'wb') as f:
+        f.write(asarray([10, s, 8./3], dtype='>d').tobytes())
+    with open(os.path.join(tmp_path, 'adj-input.bin'), 'wb') as f:
+        f.write(asarray(au, dtype='>d').tobytes())
+    call([adj_solver, str(int(nsteps))], cwd=tmp_path)
+    with open(os.path.join(tmp_path, 'adj-output.bin'), 'rb') as f:
+        out = frombuffer(f.read(), dtype='>d')
+    with open(os.path.join(tmp_path, 'dJds.bin'), 'rb') as f:
+        dJds = frombuffer(f.read(), dtype='>d')
+    shutil.rmtree(tmp_path)
+    return out, dJds
+
+if __name__ == '__main__':
+    m = 2
+    cp = shadowing(solve, u0, 28, m, 3, 1000, 5000, return_checkpoint=True)
+
+    _, _, _, lss, G_lss, g_lss, J, G_dil, g_dil = cp
+    J = np.array(J)
+    steps_per_segment = J.shape[1]
+    dJ = trapez_mean(J.mean(0), 0) - J[:,-1]
+    assert dJ.ndim == 2 and dJ.shape[1] == 1
+
+    win = windowed_mean_weights(dJ.shape[0])
+    g_lss_adj = win[:,newaxis]
+    alpha_adj_lss = win[:,newaxis] * np.array(G_lss)[:,:,0]
+
+    dil_adj = win * ravel(dJ)
+    g_dil_adj = dil_adj / steps_per_segment
+    alpha_adj_dil = dil_adj[:,newaxis] * G_dil / steps_per_segment
+
+    alpha_adj = alpha_adj_lss + alpha_adj_dil
+    b_adj = lss.adjoint(alpha_adj)
+
+    'verification'
+    print((g_lss_adj * g_lss).sum() + (b_adj * np.array(lss.bs)).sum() + (g_dil_adj * g_dil).sum())
+    alpha = lss.solve()
+    print((g_lss_adj * g_lss).sum() + (alpha_adj * alpha).sum() + (g_dil_adj * g_dil).sum())
+    grad_lss = (alpha[:,:,np.newaxis] * np.array(G_lss)).sum(1) + np.array(g_lss)
+    dil = ((alpha * G_dil).sum(1) + g_dil) / steps_per_segment
+    grad_dil = dil[:,np.newaxis] * dJ
+    print(windowed_mean(grad_lss) + windowed_mean(grad_dil))
+