In [1]:
# "magic" commands, prefaced with "%", changes settings in the notebook

# this ensures plots are embedded in notebook web page
%matplotlib inline

# pdb = Python debugger, so this command turns the debugger OFF
%pdb off

# numpy = numerical Python, implements arrays (/ matrices)
import numpy as np
# limit number of decimal places printed for floating-point numbers
np.set_printoptions(precision=3)

# scipy = scientific Python, implements operations on arrays / matrices
import scipy as sp
# linalg = linear algebra, implements eigenvalues, matrix inverse, etc
from scipy import linalg as la
# optimize = optimization, root finding, etc
from scipy import optimize as op

# produce matlab-style plots
import matplotlib as mpl
# increase font size on plots
mpl.rc('font',**{'size':18})
# use LaTeX to render symbols
mpl.rc('text',usetex=False)
# animation
from matplotlib import animation as ani
# Matlab-style plotting
import matplotlib.pyplot as plt

# symbolic computation, i.e. computer algebra (like Mathematica, Wolfram Alpha)
import sympy as sym
from sympy import init_printing
from sympy import symbols, diff, Matrix, solve

Automatic pdb calling has been turned OFF


In [2]:
# test whether this is a Colaboratory or Jupyter notebook
try:
  import google.colab
  COLAB = True
  print('Colaboratory Notebook')
except:
  COLAB = False
  print('Jupyter Notebook')

Jupyter Notebook


In [3]:
# Colab notebook
if COLAB:  
  # render SymPy equations nicely in Colaboratory Notebook
  def colab_latex_printer(exp,**options):
    from google.colab.output._publish import javascript
    url = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.3/latest.js?config=default"
    javascript(url=url)
    return sym.printing.latex(exp,**options)
  
  sym.init_printing(use_latex="mathjax",latex_printer=colab_latex_printer)

# Jupyter notebook
else:
  init_printing(use_latex='mathjax')

In [4]:
k, w, tau, gam, gam_k, gam_w, lam_k, lam_w, lam, alpha, f, a, b = symbols(r'k w \tau \gamma \gamma_k \gamma_w \lambda_k \lambda_w \lambda \alpha f a b')
k,w,tau,gam,gam_k,gam_w, lam_k, lam_w, lam, alpha, f, a, b

(k, w, \tau, \gamma, \gammaₖ, \gamma_w, \lambdaₖ, \lambda_w, \lambda, \alpha, 
f, a, b)

In [5]:
lam, lam_k, lam_w = sym.symbols(r'\lambda \lambda_k \lambda_w',nonnegative=True)

In [6]:
# e_scl =  (((tau**2)*(1 - k*w)**2) + (sig_w * w**2) + (sig_k*k**2)) / 2

e_scl =   (tau - (k*(w*tau)))**2 + (lam_w * w**2) + (lam_k*k**2)
# e_scl =   (tau - (k*(w*tau + b)))**2 + (sig_w * w**2) + (sig_k*k**2)
# sig_w and sig_k are penalty parameters
# 1/2 term added to make the derivative simpler 

In [7]:
e_scl

          2              2                     2
\lambdaₖ⋅k  + \lambda_w⋅w  + (-\tau⋅k⋅w + \tau) 

In [8]:
# First Derivative
de_dk = diff(e_scl,k)
de_dw = diff(e_scl, w)

# Second Derivative
de2_d2k = diff(de_dk, k)
de2_dwdk = diff(de_dk, w)
de2_d2w = diff(de_dw, w)
de2_dkdw = diff(de_dw, k)

In [9]:
de2 = [[de2_d2k, de2_dwdk],[de2_dkdw, de2_d2w]]
de2


⎡⎡                   2  2        2                                ⎤  ⎡      2 
⎣⎣2⋅\lambdaₖ + 2⋅\tau ⋅w , 2⋅\tau ⋅k⋅w - 2⋅\tau⋅(-\tau⋅k⋅w + \tau)⎦, ⎣2⋅\tau ⋅

                                                     2  2⎤⎤
k⋅w - 2⋅\tau⋅(-\tau⋅k⋅w + \tau), 2⋅\lambda_w + 2⋅\tau ⋅k ⎦⎦

In [10]:
de = [de_dk, de_dw]
de
# non-linear dynamics -- b/c of w^2 and k^2 
# w/k are evolving continuously in time based on gradient

[2⋅\lambdaₖ⋅k - 2⋅\tau⋅w⋅(-\tau⋅k⋅w + \tau), 2⋅\lambda_w⋅w - 2⋅\tau⋅k⋅(-\tau⋅k
⋅w + \tau)]

Continuous-time gradient descent has dynamics:

$$ \dot{x} = - Dp(x) = f(x), $$

So the Jacobian of the dynamics is determined by the Hessian of error $e$:

$$ Df(x) = - D^2 p(x). $$

At a minimum $x_0$, the eigenvalues of $D^2 e(x_0)$ are positive, so the eigenvalues of $Df(x_0)$ are negative.

Note: The Hessian matrix of a function f is the Jacobian matrix of the gradient of the function: H(f(x)) = J(∇f(x)). (https://en.wikipedia.org/wiki/Hessian_matrix)



In [11]:
Gamma = sym.Matrix.diag([1,1])
Gamma

⎡1  0⎤
⎢    ⎥
⎣0  1⎦

In [12]:
# sets f(x) = -De(x)
f = -Gamma*sym.Matrix(de)
f

⎡-2⋅\lambdaₖ⋅k + 2⋅\tau⋅w⋅(-\tau⋅k⋅w + \tau) ⎤
⎢                                            ⎥
⎣-2⋅\lambda_w⋅w + 2⋅\tau⋅k⋅(-\tau⋅k⋅w + \tau)⎦

In [13]:
# substitutes tau = 1, k_sig = w_sig = sig

subs = {lam_k:lam, lam_w:lam}

Step 1: Solving for fixed points ($\bar{x}$ such that $\frac{d}{dt}$ evaluated at $\bar{x}$ = 0, or x so that the first derivative of $\frac{df(x)}{dt}$ = 0)

In [14]:
# stationary points 
# these stationary points lead to points where e(k_0, w_0) is a minimum, maximum or saddle point
# Note: without the penalty terms in the cost function, (0,0) becomes a saddle point as a stationary point
sol = solve(de,[k,w]) # <-- find [k_0,w_0] that makes de(k_0,w_0) == [0,0]
sol

⎡        ⎛                                                                 ___
⎢        ⎜           ⎛                 ⎛     __________           ⎞⎞      ╱   
⎢        ⎜           ⎜               2 ⎜   ╲╱ \lambdaₖ    \lambdaₖ⎟⎟     ╱    
⎢        ⎜-\lambda_w⋅⎜\lambdaₖ + \tau ⋅⎜- ───────────── - ────────⎟⎟⋅   ╱   - 
⎢        ⎜           ⎜                 ⎜    ___________        2  ⎟⎟   ╱      
⎢        ⎜           ⎝                 ⎝  ╲╱ \lambda_w     \tau   ⎠⎠ ╲╱       
⎢(0, 0), ⎜────────────────────────────────────────────────────────────────────
⎢        ⎜                                                     2              
⎣        ⎝                                        \lambdaₖ⋅\tau               

_________________________                                      ⎞  ⎛           
   __________                                                  ⎟  ⎜          ⎛
 ╲╱ \lambdaₖ    \lambdaₖ                                       ⎟  ⎜          ⎜
───────────── - ────────           ________________


Here we're just looking at particular substitutions of the fixed point


In [15]:
# substitutes subs for the fixed points
# 2 are imaginary -- ignored
# other 2 are real if the sigma < 1 -- sigma can't be too large
# if sigma is too large, going back to (0, 0)
[sym.simplify(sym.Matrix(_).subs(subs)) for _ in sol]

⎡     ⎡     _______________ ⎤  ⎡      _______________⎤  ⎡      _______________
⎢     ⎢    ╱   \lambda      ⎥  ⎢     ╱   \lambda     ⎥  ⎢     ╱   \lambda     
⎢     ⎢   ╱  - ─────── - 1  ⎥  ⎢-   ╱  - ─────── - 1 ⎥  ⎢-   ╱  - ─────── + 1 
⎢     ⎢  ╱          2       ⎥  ⎢   ╱          2      ⎥  ⎢   ╱          2      
⎢⎡0⎤  ⎢╲╱       \tau        ⎥  ⎢ ╲╱       \tau       ⎥  ⎢ ╲╱       \tau       
⎢⎢ ⎥, ⎢                     ⎥, ⎢                     ⎥, ⎢                     
⎢⎣0⎦  ⎢      _______________⎥  ⎢     _______________ ⎥  ⎢      _______________
⎢     ⎢     ╱   \lambda     ⎥  ⎢    ╱   \lambda      ⎥  ⎢     ╱   \lambda     
⎢     ⎢-   ╱  - ─────── - 1 ⎥  ⎢   ╱  - ─────── - 1  ⎥  ⎢-   ╱  - ─────── + 1 
⎢     ⎢   ╱          2      ⎥  ⎢  ╱          2       ⎥  ⎢   ╱          2      
⎣     ⎣ ╲╱       \tau       ⎦  ⎣╲╱       \tau        ⎦  ⎣ ╲╱       \tau       

⎤  ⎡     _______________⎤⎤
⎥  ⎢    ╱   \lambda     ⎥⎥
⎥  ⎢   ╱  - ─────── + 1 ⎥⎥
⎥  ⎢  ╱          2      ⎥⎥
⎥  ⎢╲╱       \tau     

In [16]:
subs_sim = {lam:sym.Rational(1,2), tau:1, a: 0}
[sym.simplify(sym.Matrix(_).subs(subs).subs(subs_sim)) for _ in sol]

⎡     ⎡ √6⋅ⅈ ⎤  ⎡-√6⋅ⅈ ⎤  ⎡-√2 ⎤  ⎡√2⎤⎤
⎢     ⎢ ──── ⎥  ⎢──────⎥  ⎢────⎥  ⎢──⎥⎥
⎢⎡0⎤  ⎢  2   ⎥  ⎢  2   ⎥  ⎢ 2  ⎥  ⎢2 ⎥⎥
⎢⎢ ⎥, ⎢      ⎥, ⎢      ⎥, ⎢    ⎥, ⎢  ⎥⎥
⎢⎣0⎦  ⎢-√6⋅ⅈ ⎥  ⎢ √6⋅ⅈ ⎥  ⎢-√2 ⎥  ⎢√2⎥⎥
⎢     ⎢──────⎥  ⎢ ──── ⎥  ⎢────⎥  ⎢──⎥⎥
⎣     ⎣  2   ⎦  ⎣  2   ⎦  ⎣ 2  ⎦  ⎣2 ⎦⎦

In [17]:
# Separating the substitutions -- this is if tau = 1 only 
[sym.simplify(sym.expand(sym.simplify(sym.Matrix(_).subs({tau:1, lam_k:lam, lam_w:lam})))) for _ in sol]

⎡     ⎡   ______________ ⎤  ⎡     _____________⎤  ⎡   _____________⎤  ⎡  _____
⎢⎡0⎤  ⎢ ╲╱ -\lambda - 1  ⎥  ⎢-ⅈ⋅╲╱ \lambda + 1 ⎥  ⎢-╲╱ 1 - \lambda ⎥  ⎢╲╱ 1 - 
⎢⎢ ⎥, ⎢                  ⎥, ⎢                  ⎥, ⎢                ⎥, ⎢       
⎢⎣0⎦  ⎢     _____________⎥  ⎢   ______________ ⎥  ⎢   _____________⎥  ⎢  _____
⎣     ⎣-ⅈ⋅╲╱ \lambda + 1 ⎦  ⎣ ╲╱ -\lambda - 1  ⎦  ⎣-╲╱ 1 - \lambda ⎦  ⎣╲╱ 1 - 

________⎤⎤
\lambda ⎥⎥
        ⎥⎥
________⎥⎥
\lambda ⎦⎦

In [18]:
# expands de with the substitutions above
sym.expand(sym.simplify(sym.Matrix(de).subs(subs)))

⎡                    2    2         2  ⎤
⎢2⋅\lambda⋅k + 2⋅\tau ⋅k⋅w  - 2⋅\tau ⋅w⎥
⎢                                      ⎥
⎢                    2  2           2  ⎥
⎣2⋅\lambda⋅w + 2⋅\tau ⋅k ⋅w - 2⋅\tau ⋅k⎦

In [19]:
k0,w0 = sym.simplify(sym.Matrix(sol[4]))
x0 = {k:k0,w:w0}
x0

⎧                       _______________________________________               
⎪                      ╱                           ___________                
⎪   4 ___________     ╱    __________   \lambdaₖ⋅╲╱ \lambda_w                 
⎪   ╲╱ \lambda_w ⋅   ╱   ╲╱ \lambdaₖ  - ──────────────────────            ____
⎨                   ╱                               2                    ╱    
⎪                 ╲╱                            \tau                    ╱   ╲╱
⎪k: ───────────────────────────────────────────────────────────, w:    ╱   ───
⎪                             __________                              ╱      _
⎩                           ╲╱ \lambdaₖ                             ╲╱     ╲╱ 

                      ⎫
                      ⎪
                      ⎪
______________________⎪
__________            ⎬
 \lambdaₖ    \lambdaₖ ⎪
────────── - ──────── ⎪
__________        2   ⎪
\lambda_w     \tau    ⎭

In [20]:
# Looking at one of the solved fixed points
k0,w0 = sym.simplify(sym.Matrix(sol[4]).subs(subs).subs({a:0}))
x0 = {k:k0,w:w0}
x0
# as sigma --> tau^s, goes to 0
# as sigma --> tau^2, all 3 equilibria coincide

⎧        _______________          _______________⎫
⎪       ╱   \lambda              ╱   \lambda     ⎪
⎨k:    ╱  - ─────── + 1 , w:    ╱  - ─────── + 1 ⎬
⎪     ╱          2             ╱          2      ⎪
⎩   ╲╱       \tau            ╲╱       \tau       ⎭

In [21]:
k0_s,w0_s = sym.simplify(sym.Matrix(sol[4]).subs(subs).subs({lam:sym.Rational(1,2), tau:sym.Rational(1), a: 0}))
x0_s = {k:k0_s,w:w0_s}
x0_s

⎧   √2     √2⎫
⎨k: ──, w: ──⎬
⎩   2      2 ⎭

Step 2: Find the Jacobian evaluated at the fixed points


In [22]:
H = sym.Matrix(de).jacobian([k,w])
J = f.jacobian([k,w])

In [23]:
f

⎡-2⋅\lambdaₖ⋅k + 2⋅\tau⋅w⋅(-\tau⋅k⋅w + \tau) ⎤
⎢                                            ⎥
⎣-2⋅\lambda_w⋅w + 2⋅\tau⋅k⋅(-\tau⋅k⋅w + \tau)⎦

In [24]:
J

⎡                            2  2                   2                         
⎢        -2⋅\lambdaₖ - 2⋅\tau ⋅w            - 2⋅\tau ⋅k⋅w + 2⋅\tau⋅(-\tau⋅k⋅w 
⎢                                                                             
⎢        2                                                               2  2 
⎣- 2⋅\tau ⋅k⋅w + 2⋅\tau⋅(-\tau⋅k⋅w + \tau)          -2⋅\lambda_w - 2⋅\tau ⋅k  

       ⎤
+ \tau)⎥
       ⎥
       ⎥
       ⎦

In [25]:
de2

⎡⎡                   2  2        2                                ⎤  ⎡      2 
⎣⎣2⋅\lambdaₖ + 2⋅\tau ⋅w , 2⋅\tau ⋅k⋅w - 2⋅\tau⋅(-\tau⋅k⋅w + \tau)⎦, ⎣2⋅\tau ⋅

                                                     2  2⎤⎤
k⋅w - 2⋅\tau⋅(-\tau⋅k⋅w + \tau), 2⋅\lambda_w + 2⋅\tau ⋅k ⎦⎦

In [26]:
J.T

⎡                            2  2                   2                         
⎢        -2⋅\lambdaₖ - 2⋅\tau ⋅w            - 2⋅\tau ⋅k⋅w + 2⋅\tau⋅(-\tau⋅k⋅w 
⎢                                                                             
⎢        2                                                               2  2 
⎣- 2⋅\tau ⋅k⋅w + 2⋅\tau⋅(-\tau⋅k⋅w + \tau)          -2⋅\lambda_w - 2⋅\tau ⋅k  

       ⎤
+ \tau)⎥
       ⎥
       ⎥
       ⎦

In [27]:
# Is this symmetric? 
# Symmetric = potential game
# symmetric part of J: closely related to stability properties 
J.T - J

⎡0  0⎤
⎢    ⎥
⎣0  0⎦

In [28]:
# all eigenvals have to have a real component for linearization, no eigenvals can be purely imaginary
# Hartmann-Gromann theorem
J.eigenvals()

⎧                                                 ____________________________
⎨                            2  2       2  2     ╱         2                  
⎩-\lambdaₖ - \lambda_w - \tau ⋅k  - \tau ⋅w  - ╲╱  \lambdaₖ  - 2⋅\lambdaₖ⋅\lam

______________________________________________________________________________
                       2  2                  2  2            2                
bda_w - 2⋅\lambdaₖ⋅\tau ⋅k  + 2⋅\lambdaₖ⋅\tau ⋅w  + \lambda_w  + 2⋅\lambda_w⋅\

______________________________________________________________________________
   2  2                   2  2       4  4          4  2  2          4         
tau ⋅k  - 2⋅\lambda_w⋅\tau ⋅w  + \tau ⋅k  + 14⋅\tau ⋅k ⋅w  - 16⋅\tau ⋅k⋅w + \t

_________________                                                      _______
  4  4         4                                  2  2       2  2     ╱       
au ⋅w  + 4⋅\tau  : 1, -\lambdaₖ - \lambda_w - \tau ⋅k  - \tau ⋅w  + ╲╱  \lambd

________________________________________________

In [29]:
# linearizing around fixed-point
H0 = H.subs(subs).subs(x0)
J0 = J.subs(subs).subs(x0)

In [30]:
J0

⎡                                  2 ⎛  \lambda    ⎞                         2
⎢               -2⋅\lambda - 2⋅\tau ⋅⎜- ─────── + 1⎟                 - 2⋅\tau 
⎢                                    ⎜       2     ⎟                          
⎢                                    ⎝   \tau      ⎠                          
⎢                                                                             
⎢        2 ⎛  \lambda    ⎞          ⎛       ⎛  \lambda    ⎞       ⎞           
⎢- 2⋅\tau ⋅⎜- ─────── + 1⎟ + 2⋅\tau⋅⎜- \tau⋅⎜- ─────── + 1⎟ + \tau⎟           
⎢          ⎜       2     ⎟          ⎜       ⎜       2     ⎟       ⎟           
⎣          ⎝   \tau      ⎠          ⎝       ⎝   \tau      ⎠       ⎠           

 ⎛  \lambda    ⎞          ⎛       ⎛  \lambda    ⎞       ⎞⎤
⋅⎜- ─────── + 1⎟ + 2⋅\tau⋅⎜- \tau⋅⎜- ─────── + 1⎟ + \tau⎟⎥
 ⎜       2     ⎟          ⎜       ⎜       2     ⎟       ⎟⎥
 ⎝   \tau      ⎠          ⎝       ⎝   \tau      ⎠       ⎠⎥
                                                    

In [31]:
J0.subs({a:0}).eigenvals()
# both in eigenvalues and fixed points -- underscore that this is also in stationary points
# only meaningful for sigma < tau^2
# for sigma > tau^2 --> linearize around 0, 0
# would have a different expression for 0, 0
# bifurcation -- when the stationary points approach the 0, 0 point --> pitchfork bifurcation

⎧                                 2   ⎫
⎨-4⋅\lambda: 1, 4⋅\lambda - 4⋅\tau : 1⎬
⎩                                     ⎭

In [32]:
# J0.subs({sig:sym.Rational(1,2)}).eigenvals()

In [33]:
# gives us the bounds of sigma 
# possibly: big tau makes the game easier (larger reaching task is easier)

#lambda_bar = max eigenvalue, upper bound rate of convergence in the neighborhood of fixed point

H0.eigenvals()

⎧                                 2   ⎫
⎨4⋅\lambda: 1, -4⋅\lambda + 4⋅\tau : 1⎬
⎩                                     ⎭

When it comes to $J = Df(x_0)$, we care about whether **all eigenvalues have negative real part**,

$$ \forall \lambda \in \operatorname{spec} Df(x_0) : \operatorname{Real}\lambda < 0, $$

because we have the bound 

$$ \| x(t) - x_0 \| \leq e^{\overline{\lambda} t} \| x(0) - x_0 \| $$

where 

$$ \overline{\lambda} = \max\{\operatorname{Real}\lambda : \lambda\in \operatorname{spec} Df(x_0)\}. $$

Continuous-time gradient descent has dynamics:

$$ \dot{x} = - De(x) = f(x), $$

So on time horizon $\Delta > 0$, we have approximately

$$ x(t + \Delta) - x(t) \approx \Delta f(x(t)). $$

Discrete-time gradient descent has dynamics:

$$ x^+ = x - \Gamma De(x) = x + \Gamma f(x) = F(x), $$

Note that if $x_0$ is stationary:

$$ De(x_0) = 0 \implies f(x_0) = 0 \implies \dot{x}_0 = 0 $$

$$ De(x_0) = 0 \implies F(x_0) = x_0 $$

In [34]:
Gamma = sym.Matrix.diag([gam_k,gam_w])
Gamma

⎡\gammaₖ     0    ⎤
⎢                 ⎥
⎣   0     \gamma_w⎦

In [35]:
# Recall: f = -1*de = -1*[de_dk, de_dw]
f

⎡-2⋅\lambdaₖ⋅k + 2⋅\tau⋅w⋅(-\tau⋅k⋅w + \tau) ⎤
⎢                                            ⎥
⎣-2⋅\lambda_w⋅w + 2⋅\tau⋅k⋅(-\tau⋅k⋅w + \tau)⎦

In [36]:
# Set F as the matrix to describe the update to k+, w+
F = sym.Matrix([k,w]) + Gamma * f
F

⎡ \gammaₖ⋅(-2⋅\lambdaₖ⋅k + 2⋅\tau⋅w⋅(-\tau⋅k⋅w + \tau)) + k ⎤
⎢                                                           ⎥
⎣\gamma_w⋅(-2⋅\lambda_w⋅w + 2⋅\tau⋅k⋅(-\tau⋅k⋅w + \tau)) + w⎦

In [37]:
# # Set F as the matrix to describe the update to k+, w+
# F = sym.Matrix([k,w]) + Gamma * f.subs(subs)
# F

In [38]:
# Discrete time, derivative of F
DF = F.jacobian([k,w])
DF

⎡               ⎛                    2  2⎞                     ⎛        2     
⎢       \gammaₖ⋅⎝-2⋅\lambdaₖ - 2⋅\tau ⋅w ⎠ + 1         \gammaₖ⋅⎝- 2⋅\tau ⋅k⋅w 
⎢                                                                             
⎢         ⎛        2                                ⎞                ⎛        
⎣\gamma_w⋅⎝- 2⋅\tau ⋅k⋅w + 2⋅\tau⋅(-\tau⋅k⋅w + \tau)⎠       \gamma_w⋅⎝-2⋅\lamb

                           ⎞⎤
+ 2⋅\tau⋅(-\tau⋅k⋅w + \tau)⎠⎥
                            ⎥
             2  2⎞          ⎥
da_w - 2⋅\tau ⋅k ⎠ + 1      ⎦

In [39]:
# x0 = {k:k0,w:w0}
DF.subs(x0).subs({lam_k:lam, lam_w:lam})

⎡                     ⎛                   2 ⎛  \lambda    ⎞⎞                  
⎢             \gammaₖ⋅⎜-2⋅\lambda - 2⋅\tau ⋅⎜- ─────── + 1⎟⎟ + 1              
⎢                     ⎜                     ⎜       2     ⎟⎟                  
⎢                     ⎝                     ⎝   \tau      ⎠⎠                  
⎢                                                                             
⎢         ⎛        2 ⎛  \lambda    ⎞          ⎛       ⎛  \lambda    ⎞       ⎞⎞
⎢\gamma_w⋅⎜- 2⋅\tau ⋅⎜- ─────── + 1⎟ + 2⋅\tau⋅⎜- \tau⋅⎜- ─────── + 1⎟ + \tau⎟⎟
⎢         ⎜          ⎜       2     ⎟          ⎜       ⎜       2     ⎟       ⎟⎟
⎣         ⎝          ⎝   \tau      ⎠          ⎝       ⎝   \tau      ⎠       ⎠⎠

          ⎛        2 ⎛  \lambda    ⎞          ⎛       ⎛  \lambda    ⎞       ⎞⎞
  \gammaₖ⋅⎜- 2⋅\tau ⋅⎜- ─────── + 1⎟ + 2⋅\tau⋅⎜- \tau⋅⎜- ─────── + 1⎟ + \tau⎟⎟
          ⎜          ⎜       2     ⎟          ⎜       ⎜       2     ⎟       ⎟⎟
          ⎝          ⎝   \tau      ⎠          ⎝    

In [40]:
# Sub in the stationary point for the eignenvalues
# Take Jacobian of F, find eigenvalues and evaluate at the fixed point
DF.eigenvals()

⎧                                                                             
⎨                                2  2                                     2  2
⎩-\gammaₖ⋅\lambdaₖ - \gammaₖ⋅\tau ⋅w  - \gamma_w⋅\lambda_w - \gamma_w⋅\tau ⋅k 

      ________________________________________________________________________
     ╱        2         2            2              2  2          2     4  4  
 - ╲╱  \gammaₖ ⋅\lambdaₖ  + 2⋅\gammaₖ ⋅\lambdaₖ⋅\tau ⋅w  + \gammaₖ ⋅\tau ⋅w  -

______________________________________________________________________________
                                                                         2  2 
 2⋅\gammaₖ⋅\gamma_w⋅\lambdaₖ⋅\lambda_w - 2⋅\gammaₖ⋅\gamma_w⋅\lambdaₖ⋅\tau ⋅k  

______________________________________________________________________________
                                   2  2                           4  2  2     
- 2⋅\gammaₖ⋅\gamma_w⋅\lambda_w⋅\tau ⋅w  + 14⋅\gammaₖ⋅\gamma_w⋅\tau ⋅k ⋅w  - 16

________________________________________________

In [41]:
DF.subs(x0).subs({lam_k:lam, lam_w:lam}).eigenvals()

⎧                                      _______________________________________
⎨              2                2     ╱        2     4                        
⎩- \gammaₖ⋅\tau  - \gamma_w⋅\tau  - ╲╱  \gammaₖ ⋅\tau  + 16⋅\gammaₖ⋅\gamma_w⋅\

______________________________________________________________________________
      2                                   2                          4        
lambda  - 16⋅\gammaₖ⋅\gamma_w⋅\lambda⋅\tau  + 2⋅\gammaₖ⋅\gamma_w⋅\tau  + \gamm

___________                                               ____________________
   2     4                        2                2     ╱        2     4     
a_w ⋅\tau   + 1: 1, - \gammaₖ⋅\tau  - \gamma_w⋅\tau  + ╲╱  \gammaₖ ⋅\tau  + 16

______________________________________________________________________________
                         2                                   2                
⋅\gammaₖ⋅\gamma_w⋅\lambda  - 16⋅\gammaₖ⋅\gamma_w⋅\lambda⋅\tau  + 2⋅\gammaₖ⋅\ga

______________________________       ⎫
         

In [42]:
DF.subs(x0).subs({lam_k:lam, lam_w:lam}).subs({tau:1}).eigenvals()

⎧                         ____________________________________________________
⎨                        ╱        2                              2            
⎩-\gammaₖ - \gamma_w - ╲╱  \gammaₖ  + 16⋅\gammaₖ⋅\gamma_w⋅\lambda  - 16⋅\gamma

____________________________________________________                          
                                                  2                           
ₖ⋅\gamma_w⋅\lambda + 2⋅\gammaₖ⋅\gamma_w + \gamma_w   + 1: 1, -\gammaₖ - \gamma

        ______________________________________________________________________
       ╱        2                              2                              
_w + ╲╱  \gammaₖ  + 16⋅\gammaₖ⋅\gamma_w⋅\lambda  - 16⋅\gammaₖ⋅\gamma_w⋅\lambda

__________________________________       ⎫
                                2        ⎬
 + 2⋅\gammaₖ⋅\gamma_w + \gamma_w   + 1: 1⎭

In [43]:
DF.subs(x0).subs({lam_k:lam, lam_w:lam}).subs({tau:1}).subs({lam:sym.Rational(1,2)}).eigenvals()

{1 - 2⋅\gammaₖ: 1, 1 - 2⋅\gamma_w: 1}

When it comes to $DF(x_0)$, we care about whether **all** eigenvalues have magnitude smaller than $1$,

$$ \forall \lambda \in \operatorname{spec} DF(x_0) : |\lambda| < 1, $$

because we have the bound 

$$ \| x(k) - x_0 \| \leq \widetilde{\lambda}^k \| x(0) - x_0 \| $$

where 

$$ \widetilde{\lambda} = \max\{|\lambda| : \lambda\in \operatorname{spec} DF(x_0)\}. $$

In [44]:
#x0 = k0, w0
DF.subs({lam_k:lam, lam_w:lam}).subs({a:0, tau:1}).eigenvals()

⎧                                                                    _________
⎨                            2                                2     ╱        2
⎩-\gammaₖ⋅\lambda - \gammaₖ⋅w  - \gamma_w⋅\lambda - \gamma_w⋅k  - ╲╱  \gammaₖ 

______________________________________________________________________________
        2            2          2          2  4                             2 
⋅\lambda  + 2⋅\gammaₖ ⋅\lambda⋅w  + \gammaₖ ⋅w  - 2⋅\gammaₖ⋅\gamma_w⋅\lambda  

______________________________________________________________________________
                              2                               2               
- 2⋅\gammaₖ⋅\gamma_w⋅\lambda⋅k  - 2⋅\gammaₖ⋅\gamma_w⋅\lambda⋅w  + 14⋅\gammaₖ⋅\

______________________________________________________________________________
         2  2                                                          2      
gamma_w⋅k ⋅w  - 16⋅\gammaₖ⋅\gamma_w⋅k⋅w + 4⋅\gammaₖ⋅\gamma_w + \gamma_w ⋅\lamb

____________________________________________    

In [45]:
# iterative gradient descent converges?
# Two-Learner paper connection: learning rates can't be too large or small
# DF.subs(x0).subs({a:0, tau:1, lam:sym.Rational(1,2)}).eigenvals()
DF.subs(x0).subs({a:0, tau:1, lam_k:lam, lam_w:lam}).eigenvals()

⎧                         ____________________________________________________
⎨                        ╱        2                              2            
⎩-\gammaₖ - \gamma_w - ╲╱  \gammaₖ  + 16⋅\gammaₖ⋅\gamma_w⋅\lambda  - 16⋅\gamma

____________________________________________________                          
                                                  2                           
ₖ⋅\gamma_w⋅\lambda + 2⋅\gammaₖ⋅\gamma_w + \gamma_w   + 1: 1, -\gammaₖ - \gamma

        ______________________________________________________________________
       ╱        2                              2                              
_w + ╲╱  \gammaₖ  + 16⋅\gammaₖ⋅\gamma_w⋅\lambda  - 16⋅\gammaₖ⋅\gamma_w⋅\lambda

__________________________________       ⎫
                                2        ⎬
 + 2⋅\gammaₖ⋅\gamma_w + \gamma_w   + 1: 1⎭

In [46]:
# # substitution of sigma = 1/2

# DF.subs({a:0, tau:1, lam_k:lam, lam_w:lam, lam:sym.Rational(1,2)}).eigenvals()
DF.subs(x0).subs({a:0, tau:1, lam_k: lam, lam_w:lam}). subs({lam:sym.Rational(1,2)}).eigenvals()

{1 - 2⋅\gammaₖ: 1, 1 - 2⋅\gamma_w: 1}

In [47]:
DF.subs(x0).subs({lam:sym.Rational(1,2), gam_k:sym.Rational(6,1000), gam_w:sym.Rational(6,1000), tau:1, a:0}).eigenvals()


⎧497   3⋅\lambdaₖ     497   3⋅\lambda_w   ⎫
⎨─── - ──────────: 1, ─── - ───────────: 1⎬
⎩500      250         500       250       ⎭

In [48]:
DF.subs(x0).subs({a:0, tau:1}).eigenvals()

⎧                                                                             
⎨                                                                             
⎩\gammaₖ⋅\lambda - \gammaₖ⋅\lambdaₖ - \gammaₖ + \gamma_w⋅\lambda - \gamma_w⋅\l

                        ______________________________________________________
                       ╱        2        2            2                       
ambda_w - \gamma_w - ╲╱  \gammaₖ ⋅\lambda  - 2⋅\gammaₖ ⋅\lambda⋅\lambdaₖ - 2⋅\

______________________________________________________________________________
      2                  2         2            2                   2         
gammaₖ ⋅\lambda + \gammaₖ ⋅\lambdaₖ  + 2⋅\gammaₖ ⋅\lambdaₖ + \gammaₖ  + 14⋅\ga

______________________________________________________________________________
                     2                                                        
mmaₖ⋅\gamma_w⋅\lambda  + 2⋅\gammaₖ⋅\gamma_w⋅\lambda⋅\lambdaₖ + 2⋅\gammaₖ⋅\gamm

________________________________________________

In [49]:
DF.subs(x0).subs({a:0}).eigenvals()

⎧                                                                             
⎨                                                 2                           
⎩\gammaₖ⋅\lambda - \gammaₖ⋅\lambdaₖ - \gammaₖ⋅\tau  + \gamma_w⋅\lambda - \gamm

                                    __________________________________________
                             2     ╱        2        2            2           
a_w⋅\lambda_w - \gamma_w⋅\tau  - ╲╱  \gammaₖ ⋅\lambda  - 2⋅\gammaₖ ⋅\lambda⋅\l

______________________________________________________________________________
                  2             2          2         2            2           
ambdaₖ - 2⋅\gammaₖ ⋅\lambda⋅\tau  + \gammaₖ ⋅\lambdaₖ  + 2⋅\gammaₖ ⋅\lambdaₖ⋅\

______________________________________________________________________________
   2          2     4                              2                          
tau  + \gammaₖ ⋅\tau  + 14⋅\gammaₖ⋅\gamma_w⋅\lambda  + 2⋅\gammaₖ⋅\gamma_w⋅\lam

________________________________________________

In [50]:
# As in the block of text above, the eigenvalue that has the larger value is the one to pay attention to
# Take the log of ||KW - K_oW_o|| closer to the stationary point 
# Compare that with the t*log(eigenvalue) to get the convergence rate
DF.subs(x0).subs({lam:sym.Rational(1,2), tau:1}).eigenvals()

# eigenvalues have to be within the unit circle when sigma = 1/2

{\gammaₖ⋅(-2⋅\lambdaₖ - 1) + 1: 1, \gamma_w⋅(-2⋅\lambda_w - 1) + 1: 1}

In [51]:
# F_num = sym.lambdify([k,w],F.subs({sig:sym.Rational(1,2),gam_k:2.001,gam_w:.9}))
# x = [np.random.randn(2)]
# for i in range(100):
#   x.append(F_num(*x[-1]).flatten())
# x = np.asarray(x)
# plt.plot(x)

# ^^^ Sam messed this up

###Trying above as a vector

In [52]:
from sympy import Identity,MatrixSymbol
from sympy.abc import i, j, k, l, N, t
from sympy.solvers.solvers import solve_linear
W = MatrixSymbol("W", N, 1)
K = MatrixSymbol("K", N, 1)
I1 = Identity(1)

In [53]:
cv = (tau*I1 - K.T*W*tau)**2 + sig_k*K.T*K + sig_w*W.T*W
cv

NameError: name 'sig_k' is not defined

In [None]:
dcv_dK = -2*(tau**2)*(I1 - K.T*W)*W.T + 2*sig_k*K.T
dcv_dW = -2*(tau**2)*(I1 - K.T*W)*K.T + 2*sig_w*W.T

In [None]:
dcv = [dcv_dK, dcv_dW]
dcv

In [None]:
sol_v = solve(dcv, [K,W]) # <-- find [k_0,w_0] that makes de(k_0,w_0) == [0,0]
sol_v

In [None]:
subs = {sig_k:sig,sig_w:sig}


In [None]:
# dcv_dK = dcv_dK.subs(subs)
# dcv_dW = dcv_dW.subs(subs)

In [None]:
dcv.subs(subs)

In [None]:
sol = solve(dcv_dK,[k,w]) # <-- find [k_0,w_0] that makes de(k_0,w_0) == [0,0]
sol

can tell that K and W are co-linear at the stationary points, so can say W = alpha*K and can assume that sigma_k = sigma_w

In [None]:
subs2 = {W:alpha*K}
dcv_dK = dcv_dK.subs(subs).subs(subs2)
dcv_dW = dcv_dW.subs(subs).subs(subs2)

In [None]:
[dcv_dK, dcv_dW]

Setting both derivatives to 0 to find stationary points

In [None]:
dcv_dK 
sym.solve(dcv_dK, alpha)

### Scratch Work

In [None]:
[[de2_d2k, de2_dwdk], [de2_dkdw, de2_d2w]]

In [None]:
# Jacobian Matrix
Jcb_e_scl = Matrix([de_dk, de_dw]).jacobian([k,w])
Jcb_e_scl

In [None]:
# Evals of jacobian
Jcb_e_scl.eigenvals()

In [None]:
Jcb_e_scl.eigenvals().keys()

In [None]:
sym.solve(diff(e_scl,k),k)

In [None]:
Matrix([[k, w],[-w, k]]).eigenvals()
Matrix([[0, 1],[-2, 3]]).eigenvals()

In [None]:
sol = sym.solve([diff(e_scl,k),diff(e_scl,w)],[k,w])
sol

In [None]:
sol[k]

In [None]:
e_scl.subs(sol)

In [None]:
e_scl.subs({k:sol[k]})

In [None]:
e_scl.subs({sig_k:sig,sig_w:sig,tau:1})

In [None]:
sym.diff?

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def g_w(w,k):
  return w - 0.001*w - 0.1*w*k
def g_k(w,k):
  return k - 0.01*k +0.1*w*k

w0 = 1.
k0 = .5
max_iter = int(1E3)

w_ = np.zeros(max_iter) 
k_ = np.zeros(max_iter) 
w_[0] = w0
k_[0] = k0
for t in range(max_iter-1):
  k_[t+1] = g_k(w_[t],k_[t])
  w_[t+1] = g_w(w_[t],k_[t])

plt.plot(w_)
plt.plot(k_)
