# Фундаментальные концепции ИИ

## Лабораторная работа №1
### Градиентный спуск и его модификации

Выполнил студент группы М8О-109СВ-24 Кучев Антон

В качестве тестовых функций я использую функцию Шаффера №4 и функцию Бута

In [46]:
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator
import plotly.graph_objects as go
import numpy as np
import sympy as sy
import pandas as pd

In [None]:
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25)
X, Y = np.meshgrid(X, Y)
Z = (X + 2 * Y - 7)**2 + (2 * X + Y - 5)**2

surf = go.Surface(z=Z, x=X, y=Y, colorscale='Viridis', opacity=0.7)


fig = go.Figure(data=[surf])
fig.show()

In [26]:
def visualize(df_coords, X, Y, Z, opacity=0.7):

    surf = go.Surface(z=Z, x=X, y=Y, colorscale='Viridis', opacity=opacity)
    points = go.Scatter3d(x=df_coords['x_c'].astype(float), y=df_coords['y_c'].astype(float), z=df_coords['z_c'].astype(float), marker=dict(size=1, color='green'))
    end_point = go.Scatter3d(x=df_coords['x_c'][-1:].astype(float), y=df_coords['y_c'][-1:].astype(float), z=df_coords['z_c'][-1:].astype(float), marker=dict(size=2, color='red'))
    fig = go.Figure(data=[surf, points, end_point])
    fig.show()

In [27]:
def euclidean_distance(x_cur, y_cur, x_prev, y_prev):
    return np.sqrt((x_cur - x_prev)**2 + (y_cur - y_prev)**2)    

In [30]:
from sympy import symbols, diff

x, y = symbols('x y', real=True)

def grad(f, c1, c2):
    grad_x_expr = f.diff(x).evalf(subs={x: c1, y: c2})
    grad_y_expr = f.diff(y).evalf(subs={x: c1, y: c2})

    grad_x = float(grad_x_expr)
    grad_y = float(grad_y_expr)
    return np.array([grad_x, grad_y])

def gd(f, alpha, bounds):
    coords = np.array([np.random.random() * np.random.randint(bounds[0], bounds[1]), np.random.random() * np.random.randint(bounds[0], bounds[1])])
    d = {'x_c': [coords[0]], 'y_c': [coords[1]], 'z_c': []}
    print("Point: ", coords)
    while 1:
        h = grad(f, coords[0], coords[1])
        coords -= alpha * h
        d['x_c'].append(coords[0])
        d['y_c'].append(coords[1])
        if euclidean_distance(d['x_c'][-1], d['y_c'][-1], d['x_c'][-2], d['y_c'][-2]) < 0.001:
            break
    return coords, d

booth_expr = (x + 2 * y - 7)**2 + (2 * x + y - 5)**2

result, d= gd(booth_expr, 0.1, [-10, 10])

d['z_c'] = [booth_expr.evalf(subs={x: d['x_c'][i], y: d['y_c'][i]}) for i in range(len(d['x_c']))]

print("Result: ", result)

Point:  [-0.73129653 -1.16406208]
Result:  [1.0004428 3.0001841]


In [31]:
df_coords = pd.DataFrame(data=d)

In [None]:
visualize(df_coords, X, Y, Z)

In [199]:
def nesterov_momentum(f, alpha, beta, bounds):
    coords = np.array([np.random.random() * np.random.randint(bounds[0], bounds[1]), np.random.random() * np.random.randint(bounds[0], bounds[1])])
    print("Point: ", coords)
    v = np.array([0., 0.])
    d = {'x_c': [coords[0]], 'y_c': [coords[1]], 'z_c': []}
    while 1:
        momentum_c = coords + beta * v
        h = grad(f, momentum_c[0] , momentum_c[1])
        v = beta * v - alpha * h
        coords += v
        d['x_c'].append(coords[0])
        d['y_c'].append(coords[1])
        if euclidean_distance(d['x_c'][-1], d['y_c'][-1], d['x_c'][-2], d['y_c'][-2]) < 0.001:
            break
    return coords, d

booth_expr = (x + 2 * y - 7)**2 + (2 * x + y - 5)**2

result, d = nesterov_momentum(booth_expr, 0.01, 0.9, [-10, 10])
d['z_c'] = [booth_expr.evalf(subs={x: d['x_c'][i], y: d['y_c'][i]}) for i in range(len(d['x_c']))]
df_coords = pd.DataFrame(data=d)
print("Result: ", result)

Point:  [ 0.         -0.15258311]
Result:  [0.98798003 3.01197129]


In [200]:
visualize(df_coords, X, Y, Z)

In [203]:
def adam(f, alpha, beta1, beta2, bounds):
    coords = np.array([np.random.random() * np.random.randint(bounds[0], bounds[1]), np.random.random() * np.random.randint(bounds[0], bounds[1])])
    print("Point: ", coords)
    v = np.array([0., 0.])
    G = np.array([0., 0.])
    d = {'x_c': [coords[0]], 'y_c': [coords[1]], 'z_c': []}
    while 1:
        h = grad(f, coords[0] , coords[1])
        v = beta1 * v + (1 - beta1) * h
        G = beta2 * G + (1 - beta2) * h**2
        coords -= alpha / np.sqrt(G + 1e-8) * v
        d['x_c'].append(coords[0])
        d['y_c'].append(coords[1])
        if euclidean_distance(d['x_c'][-1], d['y_c'][-1], d['x_c'][-2], d['y_c'][-2]) < 0.001:
            break
    return coords, d

booth_expr = (x + 2 * y - 7)**2 + (2 * x + y - 5)**2

result, d = adam(booth_expr, 0.5, 0.9, 0.99, [-10, 10])
d['z_c'] = [booth_expr.evalf(subs={x: d['x_c'][i], y: d['y_c'][i]}) for i in range(len(d['x_c']))]
df_coords = pd.DataFrame(data=d)

print("Result: ", result)

Point:  [-2.10909884 -0.8439879 ]
Result:  [0.98515868 2.98742312]


In [204]:
visualize(df_coords, X, Y, Z)

In [279]:
def gd_with_lr_evolution(f, alpha, bounds):
    coords = np.array([np.random.random() * np.random.randint(bounds[0], bounds[1]), np.random.random() * np.random.randint(bounds[0], bounds[1])])
    d = {'x_c': [coords[0]], 'y_c': [coords[1]], 'z_c': []}
    print("Point: ", coords)
    decay_coeff = 0.9
    wait = 0
    while 1:
        h = grad(f, coords[0], coords[1])
        coords -= alpha * h
        alpha = alpha * (decay_coeff**(len(d['x_c'])))
        d['x_c'].append(coords[0])
        d['y_c'].append(coords[1])
        if euclidean_distance(d['x_c'][-1], d['y_c'][-1], d['x_c'][-2], d['y_c'][-2]) < 0.001:
            wait += 1
            if wait == 3:
                wait = 0
                alpha *= decay_coeff
        if euclidean_distance(d['x_c'][-1], d['y_c'][-1], d['x_c'][-2], d['y_c'][-2]) < 0.00001:
            break
    return coords, d

booth_expr = (x + 2 * y - 7)**2 + (2 * x + y - 5)**2

result, d = gd_with_lr_evolution(booth_expr, 0.1, [-10, 10])
d['z_c'] = [booth_expr.evalf(subs={x: d['x_c'][i], y: d['y_c'][i]}) for i in range(len(d['x_c']))]
df_coords = pd.DataFrame(data=d)

print("Result: ", result)

Point:  [-1.57544175 -9.84003043]
Result:  [3.21140263 0.80514897]


In [None]:
visualize(df_coords, X, Y, Z)

In [None]:
X = np.arange(-100, 100, 0.25)
Y = np.arange(-100, 100, 0.25)
X, Y = np.meshgrid(X, Y)
Z = 0.5 + (np.cos(np.sin(np.abs(X**2 - Y**2)))**2 - 0.5) / (1 + 0.001 * (X**2 + Y**2))**2

surf = go.Surface(z=Z, x=X, y=Y, colorscale='Viridis', opacity=0.1)


fig = go.Figure(data=[surf])
fig.show()

In [206]:
shaffer_expr = 0.5 + (sy.cos(sy.sin(sy.Abs(x**2 - y**2)))**2 - 0.5) / (1 + 0.001 * (x**2 + y**2))**2

result, d = gd(shaffer_expr, 0.15, [-100, 100])
d['z_c'] = [shaffer_expr.evalf(subs={x: d['x_c'][i], y: d['y_c'][i]}) for i in range(len(d['x_c']))]
df_coords = pd.DataFrame(data=d)

print("Result:", result)

Point:  [  0.         -30.05675414]
Result: [  0.        -26.9091988]


In [None]:
visualize(df_coords, X, Y, Z, 0.01)

In [210]:
shaffer_expr = 0.5 + (sy.cos(sy.sin(sy.Abs(x**2 - y**2)))**2 - 0.5) / (1 + 0.001 * (x**2 + y**2))**2

result, d = nesterov_momentum(shaffer_expr, 0.2, 0.8, [-100, 100])
d['z_c'] = [shaffer_expr.evalf(subs={x: d['x_c'][i], y: d['y_c'][i]}) for i in range(len(d['x_c']))]
df_coords = pd.DataFrame(data=d)

print("Result:", result)


Point:  [-25.46620489   6.86996215]
Result: [-129.75155331    0.61000115]


In [None]:
visualize(df_coords, X, Y, Z, 0.01)

In [212]:
shaffer_expr = 0.5 + (sy.cos(sy.sin(sy.Abs(x**2 - y**2)))**2 - 0.5) / (1 + 0.001 * (x**2 + y**2))**2

result, d = adam(shaffer_expr, 0.5, 0.9, 0.99, [-100, 100])
d['z_c'] = [shaffer_expr.evalf(subs={x: d['x_c'][i], y: d['y_c'][i]}) for i in range(len(d['x_c']))]
df_coords = pd.DataFrame(data=d)

print("Result:", result)


Point:  [-12.85325582  37.21407512]
Result: [-0.34173371 53.55768811]


In [None]:
visualize(df_coords, X, Y, Z, 0.01)

In [282]:
result, d = gd_with_lr_evolution(shaffer_expr, 0.1, [-100, 100])
d['z_c'] = [shaffer_expr.evalf(subs={x: d['x_c'][i], y: d['y_c'][i]}) for i in range(len(d['x_c']))]
df_coords = pd.DataFrame(data=d)

print("Result: ", result)

Point:  [ 2.68175601 -0.73212017]
Result:  [ 2.87640618 -0.65728775]


In [None]:
visualize(df_coords, X, Y, Z, 0.01)