# Rosenbrock function optimization

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as colormaps

from typing import Callable

Numerical gradient

In [None]:
def grad(F: Callable, x: np.array, h=0.001):
    n = len(x)
    grad = np.zeros(x.shape)
    
    for i in range(n):
        # Step with h size in a single direction for single variable
        # other values keep same
        vh = h * np.eye(1, n, i).reshape((n, ))
        
        # Applying finite differences for each variable
        grad[i] = (F(x + vh) - F(x - vh)) / (2.0 * h)
    
    return grad

3d surface print

In [None]:
def plot_grid(F: Callable[[np.array, np.array], np.array],
              X: np.array, Y: np.array,
              elev=30, azim=50, ax=None):
    """
    Plots 3D surface grid for 2 independent
    parameters and estimation equation
    :param F: estimation equation
    :param X: first independent parameter
    :param Y: second independent parameter
    :param elev: vertical rotation angle
    :param azim: horizontal rotation angle
    :param ax: predefined plotting axis
    :return: generated plotting axis
    """

    # Generating grid
    x, y = np.meshgrid(X, Y)

    # If grid plotting axis is not defined above,
    # the new one will be created.
    if ax is None:
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.view_init(elev=elev, azim=azim)

    # Plotting grid
    surf = ax.plot_surface(x, y, F(x, y), 
                         cmap=colormaps.coolwarm, 
                         antialiased=True)
    fig.colorbar(surf)

    # For axis reusability purposes
    return ax

The following code demonstrates the optimization problem for the **Rosenbrock function**:

$F(x, y) = (1 - x)^{2} + 100(y - x^{2})^{2}$

Function has a single local minimum in $(x, y) = (1, 1)$ and is equal $F(x, y) = 0$ in the following point.

The gradient value, by definition, is a vector that points towards the direction of the greatest increase of the function. In order to reach a local minimum using an iterative algorithm, the next step should be in the direction opposite to the gradient value. So the full algorithm of finding local minimum can be described as $x_{i+1} = x_{i} - \lambda_{i}\nabla F$, where $\lambda_{i} = const$ - size of a single step, $x_{i}$ - current step.

In [None]:
# Single iteration of an algorithm
def gradient_step(alpha: tf.constant, x: tf.Variable, y: tf.Variable) -> tf.Variable:
    with tf.GradientTape() as grad:
        grad.watch((alpha, x, y))

        # Optimization function
        F = (1.0 - x) ** 2.0 + 100.0 * (y - x ** 2.0) ** 2.0
        [dF_dx, dF_dy] = grad.gradient(F, [x, y])

    return x - alpha * tf.Variable(dF_dx), y - alpha * tf.Variable(dF_dy)

# Gradient steps
steps = 1000

# Gradient step size
alpha = tf.constant(0.001)

# Start conditions
x = tf.Variable(2.0)
y = tf.Variable(-1.0)

# Iterations
xi_yi = np.array([x.numpy(), y.numpy()])

for _ in range(steps):
    # Iterative descent
    x, y = gradient_step(alpha, x, y)
    xi_yi = np.vstack((xi_yi, np.array([x.numpy(), y.numpy()])))

print(f"Closest approximation of the local minimum: [{xi_yi[steps, 0]}, {xi_yi[steps, 1]}]")

Visualisation intervals and rendering density

In [None]:
density = 0.1

X = np.arange(-2.0, 2.0, density)
Y = np.arange(-1.0, 3.0, density)

**Rosenbrock function** in vector form:

In [None]:
F = lambda x, y: (1.0 - x) ** 2.0 + 100.0 * (y - x ** 2.0) ** 2.0

Descent visualisation for space $(x, y) \in \{ -2 \leq x \leq 2; -1 \leq y \leq 3 \}$

In [None]:
ax = plot_grid(F, X, Y)
ax.plot(xi_yi[:, 0].T, xi_yi[:, 1].T, F(xi_yi[:, 0], xi_yi[:, 1]).T,
        lw=0.5, marker='*', color='black')

plt.show()