In [59]:
from torch.autograd.functional import hessian, jacobian
from scipy.linalg import hilbert
import numpy as np
import torch

In [92]:
from typing import Any

class GradStop():

    def __init__(self, f = None, suf_grad = 1e-6) -> None:
        self.f = f
        self.suf_grad = suf_grad

    def __call__(self, x, verbose = False) -> Any:
        grad_norm = np.linalg.norm(jacobian(self.f, x).detach().numpy())
        solved = grad_norm < self.suf_grad
        #if solved and verbose:
        if verbose: print(f"Gradient norm {grad_norm}.")
        return solved 



class Solver():

    def __init__(self, f, stop_crit, max_iter = 10_000, ro = 0.5, alpha_ini = 0.99, c = 0.45, alpha_f = None, iternad_processor = None) -> None:
        self.f = f
        self.stop_crit = stop_crit
        if self.stop_crit.f == None: self.stop_crit.f = f
        self.ro = ro
        self.alpha_ini = alpha_ini
        self.c = c
        self.iterands = 0
        self.max_iter = max_iter
        if alpha_f: self.get_alpha = alpha_f
        self.has_iterand_processor = iternad_processor is not None
        if iternad_processor is not None:
            self.iterand_processor = iternad_processor

    def solve(self, x):
        x = self.tensorize(x)
        self.iterands = 0
        while not self.stop_crit(x) and self.iterands < self.max_iter:
            #print(f".x_{len(self.iterands)} = {x}")
            self.iterands += 1
            if self.has_iterand_processor:
                self.iterand_processor(x)
            p = torch.reshape(self.get_p(x), x.shape)
            x = x + self.get_alpha(p, x) * p
        if self.stop_crit(x, verbose = True):
            print(f"Converged to the solution {x} after {self.iterands} steps")
        else:
            print(f"Failed to converge and ended in {x}")
        return x

    def get_p(self, x):
        pass

    def get_alpha(self, p, x):
        # Do the line search
        alpha = self.alpha_ini
        while self.f(x + alpha * p) > self.f(x) + self.c * alpha * p.T @ jacobian(self.f,x):
            alpha *= self.ro
        return alpha

    def tensorize(self, x):
        if type(x) in [int, float]: return torch.DoubleTensor([x])
        else: return torch.DoubleTensor(x)

class HessianModifiedNewton(Solver):

    def get_p(self, x):
        hes = hessian(self.f, x)
        modified_hes = self.make_positive_definite(hes)
        down_grad = -jacobian(self.f, x)
        p = np.linalg.solve(modified_hes, down_grad)
        return torch.DoubleTensor(p)
    
    def make_positive_definite(self, H, beta = 1e-3, max_iter = 1e4):
        # Choose tau
        min_diag = np.min(np.diag(H))
        tau = 0 if min_diag > 0 else beta - min_diag
        I = np.eye(*H.shape)
        i = 0
        while i < max_iter:
            try:
                L = np.linalg.cholesky(H + tau * I)
                return L @ L.T
            except np.linalg.LinAlgError:
                tau = max(2 * tau, beta)
            i += 1

class Newton(Solver):

    def get_p(self, x):
        hes = hessian(self.f, x)
        down_grad = -jacobian(self.f, x)
        p = np.linalg.solve(hes, down_grad)
        return torch.DoubleTensor(p)



## Vanilla Newton

### Function 1

In [33]:
def problem_1(x):
    return 100 * (x[1] - x[0]**2)**2 + (1 - x[0])**2

solver = Newton(problem_1, GradStop(problem_1))
print("\nStarting at (1.2, 1.2)")
x = solver.solve([1.2,1.2])
print("\nStarting at (-1.2, 1)")
x = solver.solve([-1.2, 1])
print("\nStarting at (0.2, 0.8)")
x = solver.solve([0.2, 0.8])


Starting at (1.2, 1.2)
Gradient norm 5.2378828278824896e-08.
Converged to the solution tensor([1.0000, 1.0000], dtype=torch.float64) after 9 steps

Starting at (-1.2, 1)
Gradient norm 7.416159567795207e-08.
Converged to the solution tensor([1.0000, 1.0000], dtype=torch.float64) after 32 steps

Starting at (0.2, 0.8)
Gradient norm 2.671230645372248.
Failed to converge and ended in tensor([0.1948, 0.0455], dtype=torch.float64)


### Function 2

In [40]:
def problem_2(x):
    return 150 * (x[0] * x[1])**2 + (0.5 * x[0] + 2 * x[1] - 2)**2

solver = Newton(problem_2, GradStop(problem_2))
print("\nStarting at (-0.2, 1.2)")
x = solver.solve([-0.2,1.2])
print("\nStarting at (3.8, 0.1)")
x = solver.solve([3.8, 0.1])
print("\nStarting at (1.9, 0.6)")
x = solver.solve([1.9, 0.6])


Starting at (-0.2, 1.2)
Gradient norm 25.90677839441986.
Failed to converge and ended in tensor([-0.1576,  0.7289], dtype=torch.float64)

Starting at (3.8, 0.1)
Gradient norm 34.954029214106214.
Failed to converge and ended in tensor([1.4726, 0.0607], dtype=torch.float64)

Starting at (1.9, 0.6)
Gradient norm 0.006535527940709905.
Failed to converge and ended in tensor([0.4366, 0.1094], dtype=torch.float64)


## Newton Method with Hessian Modification

### Function 1

In [31]:
def problem_1(x):
    return 100 * (x[1] - x[0]**2)**2 + (1 - x[0])**2

solver = HessianModifiedNewton(problem_1, GradStop(problem_1))
print("\nStarting at (1.2, 1.2)")
x = solver.solve([1.2,1.2])
print("\nStarting at (-1.2, 1)")
x = solver.solve([-1.2, 1])
print("\nStarting at (0.2, 0.8)")
x = solver.solve([0.2, 0.8])


Starting at (1.2, 1.2)
Gradient norm 5.2378828278824896e-08.
Converged to the solution tensor([1.0000, 1.0000], dtype=torch.float64) after 9 steps

Starting at (-1.2, 1)
Gradient norm 7.416159567795207e-08.
Converged to the solution tensor([1.0000, 1.0000], dtype=torch.float64) after 32 steps

Starting at (0.2, 0.8)
Gradient norm 1.1491707515913118e-08.
Converged to the solution tensor([1.0000, 1.0000], dtype=torch.float64) after 39 steps


### Function 2

In [35]:
def problem_2(x):
    return 150 * (x[0] * x[1])**2 + (0.5 * x[0] + 2 * x[1] - 2)**2

solver = HessianModifiedNewton(problem_2, GradStop(problem_2))
print("\nStarting at (-0.2, 1.2)")
x = solver.solve([-0.2,1.2])
print("\nStarting at (3.8, 0.1)")
x = solver.solve([3.8, 0.1])
print("\nStarting at (1.9, 0.6)")
x = solver.solve([1.9, 0.6])


Starting at (-0.2, 1.2)
Gradient norm 8.246042296740478e-08.
Converged to the solution tensor([2.6682e-10, 1.0000e+00], dtype=torch.float64) after 7 steps

Starting at (3.8, 0.1)
Gradient norm 8.949292743208082e-07.
Converged to the solution tensor([4.0000e+00, 1.8204e-10], dtype=torch.float64) after 13 steps

Starting at (1.9, 0.6)
Gradient norm 1.354512588685944e-08.
Converged to the solution tensor([ 4.0000e+00, -2.9169e-12], dtype=torch.float64) after 21 steps
