In [1]:
import numpy as np
from matplotlib import pyplot as plt
from typing import Tuple, Mapping
import numba
import timeit

In [3]:
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
plt.ioff()
a = np.arange(-10,10,0.1)
b = np.arange(-10,10,0.1)
xgrid, ygrid = np.meshgrid(a, b)

'''
Функция Бута
'''

def func_but(x, y):
    return (x + 2 * y - 7) * (x + 2 * y - 7) + (2 * x + y - 5) * (2 * x + y - 5) # исходная функция 

def dx_but(x, y):
    return 10 * x + 8 * y - 34 # производная по х

def dy_but(x, y):
    return 10 * y + 8 * x - 38 # производня по у

In [4]:
'''
Реализация шедулера
'''

class lr_scheduler():
    def __init__(self, step_size:int=5000, gamma:float=0.1, lr:float=0.1):
        '''
        step_size -- количество итераций необходимое для уменьшения скорости обучения
        gamma -- коэффициент на который домнажается скорость обучеия каждые step_size шагов
        lr -- скорость обучения
        '''
        self.iter  = 0 # переменная для подсчета шагов
        self.step_size = step_size
        self.gamma = gamma
        self.lr = lr

    def step(self):
        self.iter += 1
        if self.iter == self.step_size: # когда число итераций становится равно заданному порогу -> уменьшаем скорость обучения
            self.lr = self.lr * self.gamma
            self.iter = 0
        return self.lr

In [5]:
def GD(f:Mapping, dx:Mapping, dy:Mapping, x0:np.ndarray, y0:np.ndarray, lr_start:float=0.001, iter=100, thr:np.ndarray=None):
    '''
        f -- исходная функция
        dx -- производня по х от исходной функции
        dy -- производня по у от исходной функции
        x0 -- стартовая точка по координате х
        y0 -- стартовая точка по координате у
        lr_start -- изначальное значение скорости обучения
        iter -- число итераций выполнения алгоритма
        thr -- точность вычислений 
    '''
    x_old = x0.copy()
    y_old = y0.copy()
    
    scheduler = lr_scheduler(lr=lr_start)
    for i in range(iter):
        if np.linalg.norm(dx(x_old, y_old)) > thr[0] and np.linalg.norm(dy(x_old, y_old)) > thr[0]:
            gradX = dx(x_old, y_old)
            gradY = dy(x_old, y_old)
            x_old = x_old - scheduler.lr * gradX
            y_old = y_old - scheduler.lr * gradY
            scheduler.step()
    plt.show()
    return x_old, y_old

In [6]:
a = np.arange(-10,10,0.1)
b = np.arange(-10,10,0.1)
xgrid, ygrid = np.meshgrid(a, b)

### Работа обычного алгоритма

In [7]:
%%timeit
x, y = GD(f=func_but, dx=dx_but, dy=dy_but, x0=np.array([-1]), y0=np.array([-1]), iter=100, thr=np.array([10]))
print(x, y)
func_but(x, y)

[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531]

### Ускорение при помощи numba

In [10]:
from numpy.linalg import norm

@numba.njit(fastmath=True)
def f(x, y):
    return (x + 2 * y - 7) * (x + 2 * y - 7) + (2 * x + y - 5) * (2 * x + y - 5) # исходная функция 

@numba.njit(fastmath=True)
def dx(x, y):
    return 10 * x + 8 * y - 34 # производная по х

@numba.njit(fastmath=True)
def dy(x, y):
    return 10 * y + 8 * x - 38 # производня по у

@numba.njit(fastmath=True)
def GD(x0:np.ndarray, y0:np.ndarray, lr_start:float=0.001, iter:np.ndarray=100, thr:np.ndarray=None, step_size:int=5000, gamma:float=0.1):
    '''
        x0 -- стартовая точка по координате х
        y0 -- стартовая точка по координате у
        lr_start -- изначальное значение скорости обучения
        iter -- число итераций выполнения алгоритма
        thr -- точность вычислений 
    '''
    x_old = x0.copy()
    y_old = y0.copy()
    t = thr.copy()
    iteration = 0
    while iter[0] > 0:
        a = np.array([dx(x_old[0], y_old[0])])
        if norm(a) > thr[0]:
            gradX = dx(x_old, y_old)
            gradY = dy(x_old, y_old)
            x_old = x_old - lr_start * gradX
            y_old = y_old - lr_start * gradY
            iteration += 1
            if iter == step_size:
                iteration = 0
                lr_start = lr_start * gamma
        iter -= 1
    return x_old, y_old

In [9]:
a = np.arange(-10,10,0.1)
b = np.arange(-10,10,0.1)
xgrid, ygrid = np.meshgrid(a, b)

In [11]:
%%timeit
x, y = GD(x0=np.array([-1.0]), y0=np.array([-1.0]), iter=np.array([100]), thr=np.array([10.0]))
print(x, y)
f(x, y)

[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
[1.20290531] [1.51586288]
The slowest run took 5.62 times longer than the fastest. This could mean that an intermediate result is being cached.
1.55 ms ± 1.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
