In [2]:
from matplotlib import pyplot as plt
import matplotlib.lines as mlines
import numpy as np
plt.rcParams["figure.figsize"] = (20,10)

In [3]:
class Poly:
    """"
        c_0 + c_1 x + c_2 x^2 + .. + c_deg x^deg
    """
    def __init__(self, *, deg=2, coeffs=None):
        if coeffs is not None:
            self.coeffs = np.array(coeffs)
        else:
            self.coeffs = np.ones(deg+1)

    def __call__(self, x):
        return sum(c_i * x**i for (i, c_i) in enumerate(self.coeffs))

    def grad(self, x):
        return np.array([x**i for (i, c_ip1) in enumerate(self.coeffs[:])]).T[0]

    def hessian(self, x):
        return np.diag(list(x**i for (i, c_ip1) in enumerate(self.coeffs[:])))

In [4]:
def regression(x, y, method, **config):
    if config == {}:
        config = {"lr0": 0.5, "d": 0.005, "epoch": 1000}
    f = lambda x_poly: y - x_poly(x.T[0])
    jacobi = lambda x_poly: np.array([- x_poly.grad(x[i]) for i in range(len(x))])
    bs = method(f, np.zeros(len(x)), **config, jacobi=jacobi)
    print('hm')
    print(f'came close by {f(Poly(coeffs=bs[-1]))}')
    return bs[-1]

def gauss_newton(f, x, *, lr, epoch, jacobi):
    points = np.zeros((epoch, len(x)))
    x_poly = Poly(coeffs=x)
    points[0] = x_poly.coeffs

    for i in range(epoch):
        j = jacobi(x_poly)
        g = np.matmul(j.T, f(x_poly).reshape(-1,1))
        h = np.matmul(j.T,j)
        p = np.matmul(np.linalg.inv(h), g).T[0]
        x_poly.coeffs -= lr * p
        points[i] = x_poly.coeffs
    return points

regression(np.array([[-2], [3], [5], [4]]), np.array([5, 10, 26, 17]), gauss_newton, lr=1, epoch=1)

hm
came close by [-2.47268872e-12 -2.40163445e-12 -7.94386779e-12 -4.60431693e-12]


array([ 1.00000000e+00, -4.40536496e-13,  1.00000000e+00,  2.84217094e-14])

In [5]:
def dogleg_method_step(grad_k, hessian_k, trust_radius):
    hessian_k_inv = np.linalg.inv(hessian_k)
    dx_newton = -np.matmul(hessian_k_inv, grad_k)
    dx_newton_norm = np.linalg.norm(dx_newton)

    if dx_newton_norm <= trust_radius:
        return dx_newton

    dx_steepest = - np.dot(grad_k, grad_k) / np.dot(grad_k, np.dot(hessian_k,grad_k)) * grad_k
    dx_steepest_norm = np.linalg.norm(dx_steepest)

    if dx_steepest_norm >= trust_radius:
        return trust_radius * dx_steepest / dx_steepest_norm

    diff = dx_newton - dx_steepest
    dx_steepest_x_diff = np.matmul(dx_steepest.T, diff)
    discriminant = dx_steepest_x_diff ** 2 - np.linalg.norm(diff) ** 2 * \
                   (np.linalg.norm(dx_steepest) ** 2 - trust_radius ** 2)
    tau = (-dx_steepest_x_diff + np.sqrt(discriminant)) / np.linalg.norm(diff) ** 2
    return dx_steepest + tau * (dx_newton - dx_steepest)

def trust_region_method(func, grad, hessian, x, tr0=1, tr_limit=2 ** 5, epoch=10, eta=0.1):
    x_poly = Poly(coeffs=x)
    points = np.zeros((epoch, len(x)))
    points[0] = x_poly.coeffs
    trust_radius = tr0
    for i in range(1, epoch):
        grad_k = grad(x_poly)
        hessian_k = hessian(x_poly)
        pk = dogleg_method_step(grad_k, hessian_k, trust_radius)

        moved = Poly(coeffs=x_poly.coeffs + pk)

        # Actual reduction.
        act_red = sum(func(x_poly)**2) - sum(func(moved)**2)

        # Predicted reduction.
        # pred_red = -(np.dot(grad_k, pk) + 0.5 * np.dot(pk, np.dot(hessian_k , pk)))
        pred_red = -(np.matmul(grad_k.T, pk) + 0.5 * np.matmul(pk.T, np.dot(hessian_k, pk)))
        # print(f'{pred_red=}\n{act_red=}')
        # print(f'{trust_radius = }')
        # Rho.
        if pred_red == 0.0:
            rhok = 1e99
        else:
            rhok = act_red / pred_red

        # Calculate the Euclidean norm of pk.
        norm_pk = np.linalg.norm(pk)

        # Rho is close to zero or negative, therefore the trust region is shrunk.
        if rhok < 0.25:
            trust_radius = 0.25 * trust_radius
        else:
            # Rho is close to one and pk has reached the boundary of the trust region, therefore the trust region is expanded.
            if rhok > 0.75 and norm_pk == trust_radius:
                trust_radius = min(2.0 * trust_radius, tr_limit)
            else:
                trust_radius = trust_radius

        # Choose the position for the next iteration.
        if rhok > eta:
            x_poly = moved
        else:
            x_poly = x_poly
        points[i] = x_poly.coeffs
    return points

def regression_pdl(x, y, method, **config):
    if config == {}:
        config = {"lr0": 0.5, "d": 0.005, "epoch": 1000}
    f = lambda x_poly: (y - x_poly(x.T[0]))
    jacobi = lambda x_poly: np.array([- x_poly.grad(x[i]) for i in range(len(x))])
    hessian = lambda x_poly: np.matmul(jacobi(x_poly).T, jacobi(x_poly))
    grad = lambda x_poly: 2*np.matmul(jacobi(x_poly).T, f(x_poly))
    bs = method(f, grad, hessian, np.zeros(len(x)), **config)
    print('hm')
    print(f'came close by {f(Poly(coeffs=bs[-1]))}')
    return bs[-1]

def test_pdl(coeffs, points, **config):
    coeffs = np.array(coeffs)
    points = np.array(points)
    test_poly = Poly(coeffs=coeffs)
    res = regression_pdl(np.array(points.reshape(-1,1)),test_poly(points),trust_region_method, **config)
    print(f'result for {coeffs} is\n{res}')
    return res


test_pdl([1, 0, 1], [1, 0, -1], epoch=40, tr0=1)
test_pdl([1, 0, 1], [1, 0, -1, 2, 3], epoch=40, tr0=1, eta=0.05)
test_pdl([1, 0, 1, 0, 1, 2], [1, 0, -1, 2, 3, -4], epoch=100, tr0=1, eta=0.1)

hm
came close by [9.75554042e-07 4.78175113e-07 9.75554042e-07]
result for [1 0 1] is
[0.99999952 0.         0.9999995 ]
hm
came close by [-8.81133939e-05 -5.44572193e-05 -1.15601494e-04 -1.22060547e-04
  2.02436770e-05]
result for [1 0 1] is
[ 1.00005446e+00 -4.82854548e-06  1.00005082e+00 -8.91550443e-06
 -3.41803695e-06]
hm
came close by [0.03387678 0.02097845 0.04519732 0.06230332 0.13832769 0.05692319]
result for [1 0 1 0 1 2] is
[9.79021546e-01 6.96281223e-04 9.81089080e-01 5.48161037e-03
 1.00035233e+00 1.99948238e+00]


array([9.79021546e-01, 6.96281223e-04, 9.81089080e-01, 5.48161037e-03,
       1.00035233e+00, 1.99948238e+00])

In [6]:
def stochastic_jacobian(self, c, points):
    jac = np.ndarray(shape=(len(points), len(c)))
    for i in range(len(points)):
        jac[i] = -self.function.grad(points[i][1:], c).T[0]
    return jac

def stochastic_grad(self, c, batch_count=1):
    if not hasattr(self, "pos"):
        self.pos = 0
    batch = self.p[self.pos: self.pos + batch_count]
    self.pos = (self.pos + batch_count) % len(self.p)
    j = self.stochastic_jacobian(c, batch)
    return 2 * np.matmul(j.T, self.get_r(c, batch))

def grad_batch(f_batch_size, batch_size):
    def grad_help(*args):
        h = 1e-10
        dim = len(args)
        f = f_batch_size(batch_size)
        return [(
                        f(*[args[j] + (h if j == i else 0) for j in range(dim)])
                        -
                        f(*[args[j] - (h if j == i else 0) for j in range(dim)])
                ) / (2 * h)
                for i in range(dim)]
    return grad_help

def sgd_adam(batch_size, f, x, *, lr0, epoch, alpha, beta):
    points = np.zeros((epoch, 3))
    points[0] = x
    m = 0
    v = 0
    for i in range(1, epoch):
        g = np.array(grad_batch(f, batch_size)(*x))
        m = alpha * m + (1-alpha)*g
        v = beta * v + (1 - beta) * g**2

        m_ = m/(1-alpha)
        v_ = v/(1-beta)

        x = x - lr0*m_/(np.sqrt(v_) + 1e-5)
        points[i] = x
    return points

def regression(x, y, batch_size=1, method=sgd_adam, **config):
    if config == {}:
        config = {"lr0": 0.5, "d": 0.005, "epoch": 1000}
    x_mat = np.hstack((np.ones((x.shape[0], 1)), x))
    k = x_mat.shape[1]
    batch_choice = lambda batch_size: list(set(np.random.choice(np.arange(x.shape[0]), batch_size, replace=False)))
    f_batch_size = lambda batch_size: \
                       lambda *b, batch=batch_choice(batch_size): \
                           np.linalg.norm((y[batch] - x_mat[batch].dot(b)))
    bs = method(batch_size, f_batch_size, np.full(3, 1), **config)
    f = f_batch_size(x.shape[0])
    print(f'came close by {f(*bs[-1])}')
    ax = plt.figure().add_subplot()
    X = np.arange(len(bs))
    ax.plot(X, np.vectorize(f)(*bs.T))
    ax.grid()
    if len(x[0]) == 1:
        draw_2d(x, y, bs[-1])
    return bs[-1]

def draw_2d(x, y, bs):
    x = x.reshape(len(x))
    ax = plt.figure().add_subplot()
    ax.scatter(x, y)
    ax.grid(True)
    tmin = x.min() - 1
    tmax = x.max() + 1
    X = np.array([tmin, tmax])
    Y = (lambda z: bs[0] + bs[1] * z)(X)
    ax.add_line(mlines.Line2D(X, Y, color='green'))

def draw_polynom(x_poly, x_points, y_points=None):
    if y_points is None:
        y_points = x_poly(x_points)
    ax = plt.figure().add_subplot()
    ax.scatter(x_points, y_points)
    ax.grid()
    X = np.linspace(x_points.min(), x_points.max(), 100)
    ax.plot(X, x_poly(X))

def test_adam(coeffs, points, **config):
    coeffs = np.array(coeffs)
    points = np.array(points)
    test_poly = Poly(coeffs=coeffs)
    res = regression(points.reshape(-1,1), test_poly(points), **config)
    print(f'Adam result for {coeffs} is\n{res}')
    return res


def test_compare(coeffs, points, config_pdl, config_adam):
    coeffs = np.array(coeffs)
    points = np.array(points)
    pdl_poly = Poly(coeffs=test_pdl(coeffs, points, **config_pdl))
    draw_polynom(pdl_poly, points)
    adam_poly = Poly(coeffs=test_adam(coeffs, points, **config_adam))
    draw_polynom(adam_poly, points)

# f = Poly(coeffs=[0,0,1])
# x = np.array([-2,-1,0,1,2])
# draw_polynom(f, x, f(x))



regression(np.array([[1], [2], [3]]), np.array([2, 4, 6]), lr0=0.1, epoch=1000, alpha=0.35, beta=0.2)
# test_pdl([0, 2], [1, 2, 3], epoch=1000, tr0=0.1)

# test_compare([1,0,1], [-2,0,4]
#              , {'epoch': 100, 'tr0': 0.1}
#              , {'lr0': 0.1, 'epoch': 1000, 'alpha': 0.35, 'beta':0.2})

ValueError: shapes (1,2) and (3,) not aligned: 2 (dim 1) != 3 (dim 0)