In [1]:
import numpy as np
from pprint import pprint
from torch.autograd.functional import jacobian, hessian
import torch, time

### Vanilla Newton

In [8]:
class Solver():

    def __init__(self, f, max_iter = 10_000, ro = 0.5, alpha_ini = 0.99, c = 0.45, alpha_f = None, iternad_processor = None) -> None:
        self.f = f
        self.ro = ro
        self.alpha_ini = alpha_ini
        self.c = c
        self.iterands = 0
        self.max_iter = max_iter
        self.done = False

    def solve(self, x):
        x = self.tensorize(x)
        self.iterands = 0
        while not self.done and self.iterands < self.max_iter:
            #print(f".x_{len(self.iterands)} = {x}")
            self.iterands += 1
            p = torch.reshape(self.get_p(x), x.shape)
            x = x + self.get_alpha(p, x) * p
        """if self.stop_crit(x, verbose = True):
            print(f"Converged to the solution {x} after {self.iterands} steps")
        else:
            print(f"Failed to converge and ended in {x}")"""
        return x

    def get_p(self, x):
        pass

    def get_alpha(self, p, x):
        # Do the line search
        alpha = self.alpha_ini
        while self.f(x + alpha * p) > self.f(x) + self.c * alpha * p.T @ jacobian(self.f,x):
            alpha *= self.ro
        return alpha

    def tensorize(self, x):
        if type(x) in [int, float]: return torch.DoubleTensor([x])
        else: return torch.DoubleTensor(x)

class Newton(Solver):

    def get_p(self, x):
        hes = hessian(self.f, x)
        down_grad = -jacobian(self.f, x)
        if np.linalg.norm(down_grad.detach().numpy()) < 1e-6:
            self.done = True
        p = np.linalg.solve(hes, down_grad)
        return torch.DoubleTensor(p)
    
class GradStop():

    def __init__(self, f = None, suf_grad = 1e-6) -> None:
        self.f = f
        self.suf_grad = suf_grad

    def __call__(self, x, verbose = False):
        grad_norm = np.linalg.norm(jacobian(self.f, x).detach().numpy())
        solved = grad_norm < self.suf_grad
        #if solved and verbose:
        if verbose: print(f"Gradient norm {grad_norm}.")
        return solved 

### Quasi-Newton

In [23]:
def backtracking_line_search(func, xk, pk, alpha=1, rho=0.5, c=1e-4):
    grad_fk = jacobian(func,torch.DoubleTensor(xk))#.detach().numpy()
    dot_prod = torch.dot(grad_fk, pk)
    while func(xk + alpha * pk) > func(xk) + c * alpha * dot_prod:
        alpha *= rho

    if alpha < 1e-8:
        alpha = 1
    return alpha

In [29]:
# Quasi-Newton SR1
def sr1_method(f, x0, B0, H0, max_iter=10000, tol=1e-6):
    xk = x0
    Bk = torch.DoubleTensor(B0)
    Hk = torch.DoubleTensor(H0)
    results = []
    for k in range(max_iter):
        grad_fk = jacobian(f, torch.DoubleTensor(xk))#.detach().numpy()
        pk = -np.linalg.solve(Bk, grad_fk.detach().numpy())
        pk = torch.DoubleTensor(pk)

        alpha_k = backtracking_line_search(f, xk, pk)

        xk1 = xk + alpha_k * pk
        sk = xk1 - xk
        grad_fk1 = jacobian(f, torch.DoubleTensor(xk1))#.detach().numpy()
        yk = grad_fk1 - grad_fk
        
        ys = yk - Bk @ sk
        ys_T_s = ys.T @ sk
        Bk1 = Bk + torch.outer(ys, ys) / ys_T_s

        sy = sk - Hk @ yk
        sy_T_y = sy.T @ yk
        Hk1 = Hk + torch.outer(sy, sy) / sy_T_y
        
        results.append({
            "iteration": k,
            "x_k+1": xk1,
            "s_k": sk,
            "y_k": yk,
            "B_k+1": Bk1,
            "H_k+1": Hk1,
            "(sk - Hkyk)^Tyk": sy_T_y,
            "|grad_f_k+1|": np.linalg.norm(grad_fk1.detach().numpy())
        })

        if np.linalg.norm(grad_fk1.detach().numpy()) < tol:
            break

        xk, Bk = xk1, Bk1

    return results


### Race setup

In [58]:
def race(f, starting_point, verbose = True):
    starting_point = torch.DoubleTensor(starting_point)
    # SR1
    t1 = time.time()
    sr1_method(f, starting_point, np.eye(len(starting_point)),  np.eye(len(starting_point)))
    t2 = time.time()
    sr_time = t2 - t1
    if verbose: print(f"SR1 needed {sr_time} seconds")
    # Newton
    solver = Newton(f)
    t1 = time.time()
    solver.solve(starting_point)
    t2 = time.time()
    nw_time = t2 - t1
    if verbose: print(f"Newton needed {nw_time} seconds")
    return sr_time, nw_time

In [60]:
def robust_race(f, starting_point, n = 20):
    srs = 0
    nws = 0
    for i in range(n):
        srs_res, nws_res = race(f, starting_point, verbose=False)
        srs += srs_res
        nws += nws_res
    print(f"SR1 needed {srs/n} seconds")
    print(f"Newton needed {nws/n} seconds")
    
    

In [61]:
def rosenbrock_f(x):
    return 100 * (x[1] - x[0]**2)**2 + (1 - x[0])**2

In [66]:
robust_race(rosenbrock_f, [1.2,1.0], n=50)

SR1 needed 0.03046987533569336 seconds
Newton needed 0.0198876428604126 seconds


In [78]:
def noisy_polynomial_f(x):
    if not np.random.randint(0, 5): return x[0]**2 + 1e-6 * x[1]**2
    return 12 * x[0]**2 + x[1]**2

In [79]:
robust_race(noisy_polynomial_f, [1.2,1.2])

SR1 needed 0.00579155683517456 seconds
Newton needed 0.024589478969573975 seconds


In [84]:
robust_race(noisy_polynomial_f, [12,2.5])

SR1 needed 0.006417906284332276 seconds
Newton needed 0.03006303310394287 seconds


In [None]:
def noisy_polynomial_f(x):
    if not np.random.randint(0, 5): return x[0]**2 + 1e-6 * x[1]**2
    return 12 * x[0]**2 + x[1]**2