# HW2 Problem 1: Newton's Method

We want to optimize the function $f: \mathbb{R}^3 \rightarrow \mathbb{R}$

$$f(x_1,x_2,x_3) = x_3\log(e^{\frac{x_1}{x_3}}+e^{\frac{x_2}{x_3}})+(x_3 - 2)^2+e^{\frac{1}{x_1+x_2}}, \qquad \textbf{dom } f=\{\mathbf{x}\in\mathbb{R}^3 : x_1+x_2>0,\,x_3>0\}$$

For Newton's method, we need the Hessian $\nabla^2 f$

$$
\nabla^2 f(x_1,x_2,x_3)
=
\frac{w}{x_3}
\begin{pmatrix}
1 & -1 & -\Delta \\
-1 & 1 & \Delta \\
-\Delta & \Delta & \Delta^2
\end{pmatrix}
+
t
\begin{pmatrix}
1 & 1 & 0 \\
1 & 1 & 0 \\
0 & 0 & 0
\end{pmatrix}
+
\begin{pmatrix}
0 & 0 & 0 \\
0 & 0 & 0 \\
0 & 0 & 2
\end{pmatrix}
$$

with the following definitions

$$
\begin{aligned}
s &:= x_1 + x_2, \\
\Delta &:= \frac{x_1 - x_2}{x_3}, \\
\sigma(\Delta) &:= \frac{1}{1+e^{-\Delta}}, \\
w &:= \sigma(\Delta)\big(1-\sigma(\Delta)\big), \\
E &:= e^{1/s}, \\
t &:= E\,\frac{2s+1}{s^4}.
\end{aligned}
$$

In [1]:
# Import packages
import numpy as np
from numpy import log, exp
from scipy.special import expit

In [2]:
# Define the function
def f(x:np.ndarray) -> float:
    x1, x2, x3 = x 
    val = x3 * np.log(np.exp(x1 / x3) + np.exp(x2 / x3)) + (x3 - 2) ** 2 + np.exp(1 / (x1 + x2))
    return val

# Define the gradient of the function
def grad_f(x:np.ndarray) -> np.ndarray:
    
    x1, x2, x3 = x
    delta = (x1 - x2) / x3
    E = exp(1 / (x1 + x2))
    q = (x1 + x2)**2

    grad = np.zeros(3)
    grad[0] = expit(delta) - E / q
    grad[1] = expit(-delta) - E / q
    grad[2] = log(exp(x1 / x3) + exp(x2 / x3)) - (2 - x3) * 2 - (x2 + (x1 - x2)*expit(delta)) / x3

    return grad

# Define the Hessian of the function
def hess_f(x: np.ndarray) -> np.ndarray:
    x1, x2, x3 = x
    if x3 <= 0 or (x1 + x2) <= 0:
        raise ValueError("Outside domain: need x3>0 and x1+x2>0.")

    s = x1 + x2
    E = np.exp(1.0 / s)

    Delta = (x1 - x2) / x3
    p = expit(Delta)
    w = p * (1.0 - p)                # pq
    t = E * (2.0 * s + 1.0) / (s**4) # second derivative of exp(1/s) composed with s=x1+x2

    # Hessian = (w/x3)*A + t*B + diag(0,0,2)
    A = np.array([
        [1.0, -1.0, -Delta],
        [-1.0, 1.0,  Delta],
        [-Delta, Delta, Delta**2]
    ], dtype=float)

    B = np.array([
        [1.0, 1.0, 0.0],
        [1.0, 1.0, 0.0],
        [0.0, 0.0, 0.0]
    ], dtype=float)

    H = (w / x3) * A + t * B
    H[2, 2] += 2.0
    return H

In [None]:
# Parameters
alpha = 0.4
beta = 0.5
eps = 1e-5
x0 = np.array([3.0, 4.0, 5.0])

def in_domain(x: np.ndarray) -> bool:
    return (x[2] > 0) and ((x[0] + x[1]) > 0)

def newton_direction(x: np.ndarray, damping: float = 1e-8) -> np.ndarray:
    g = grad_f(x).reshape(-1)
    H = hess_f(x)

    # In theory H should be PD on domain, but numerically add tiny damping:
    H_damped = H + damping * np.eye(3)

    # Solve H p = -g
    p = np.linalg.solve(H_damped, -g)
    return p

def backtracking_newton(x: np.ndarray, p: np.ndarray) -> float:
    t = 1.0
    fx = f(x)
    g = grad_f(x).reshape(-1)
    gTp = float(np.dot(g, p))

    # If somehow not a descent direction (numerical issues), fallback to steepest descent
    if gTp >= 0:
        p = -g
        gTp = float(np.dot(g, p))

    while True:
        x_new = x + t * p
        if not in_domain(x_new):
            t *= beta
            continue

        if f(x_new) <= fx + alpha * t * gTp:
            return t

        t *= beta

iter = 0
g = grad_f(x0).reshape(-1)
gnorm = np.linalg.norm(g)

while gnorm > eps:
    p = newton_direction(x0)          # Newton direction
    t = backtracking_newton(x0, p)    # damped step length
    x0 = x0 + t * p                   # update

    g = grad_f(x0).reshape(-1)
    gnorm = np.linalg.norm(g)
    iter += 1

print("Optimal solution:", x0)
print("Optimal function value:", f(x0))
print("Number of iterations taken to converge:", iter)

Optimal solution: [0.92620836 0.92620837 1.65342641]
Optimal function value: 3.9081137862618798
Number of iterations taken to converge: 5
