In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.animation import FuncAnimation
import pandas as pd
import math
import scipy.stats
from IPython.display import HTML
import tqdm
%matplotlib inline

In [2]:
#勾配と偏微分
h = 1.0 * 10**-7
def grad_f(x,y):
    grad_x = (f(x + h, y) - f(x, y)) / h
    grad_y = (f(x, y + h) - f(x, y)) / h
    return [grad_x, grad_y]

def partial_grad_f(x,y,index):
    if index == 0:
        grad_x = (f(x + h, y) - f(x, y)) / h
        grad_y = 0
    else:
        grad_x = 0
        grad_y = (f(x, y + h) - f(x, y)) / h
    return [grad_x, grad_y]
    


In [3]:
#パラメータ更新
def optimize(x,y,step_num = 50, learning_rate=1e-2,momentum=0.8, mode ="GD"):
    start = (x,y)
    points = []
    if mode == "GD":
        for i in range(step_num+1):
            x,y = start
            gradient = grad_f(x,y)
            x_new = x - learning_rate * gradient[0]
            y_new = y - learning_rate * gradient[1]
            points.append([x,y,f(x,y)])
            start = (x_new,y_new)
    elif mode == "momentum":
        v_x, v_y = 0, 0 # the momentum point
        for i in range(step_num+1):
            x,y = start
            gradient = grad_f(x, y)
            v_x = momentum * v_x - learning_rate * gradient[0]
            v_y = momentum * v_y - learning_rate * gradient[1]
            x_new = x + v_x
            y_new = y + v_y
            points.append([x,y,f(x,y)])
            start = (x_new,y_new)
    elif mode == "ACDM":
        gamma = 1/8.0
        c= 1/8.0
        v_x, v_y = x, y # the momentum point
        for i in range(step_num+1):
            gamma_temp = gamma
            gamma = math.sqrt(c**2+gamma_temp**2) + c
            alpha = gamma/(4.0*gamma_temp**2+gamma)
            random = scipy.stats.bernoulli.rvs(0.5)
            x,y = start
            v_x_old, v_y_old = v_x, v_y
            gradient = partial_grad_f(alpha*v_x+(1.0-alpha)*x, alpha*v_y+(1.0-alpha)*y, random)
            x_new = alpha*v_x+(1.0-alpha)*x - learning_rate * gradient[0]
            y_new = alpha*v_y+(1.0-alpha)*y - learning_rate * gradient[1]
            v_x = v_x - gamma*learning_rate * gradient[0]
            v_y = v_y - gamma*learning_rate * gradient[1]
            points.append([x,y,f(x,y),v_x_old, v_y_old,alpha*v_x_old+(1.0-alpha)*x, alpha*v_y_old+(1.0-alpha)*y])
            start = (x_new,y_new)
    elif mode == "exact_ACDM": #証明済みバージョン
        gamma = 1/8.0
        c= 1/8.0
        v_x, v_y = x, y # the momentum point
        for i in range(step_num+1):
            gamma_temp = gamma
            gamma = math.sqrt(c**2+gamma_temp**2) + c
            alpha = gamma/(4.0*gamma_temp**2+gamma)
            random = scipy.stats.bernoulli.rvs(0.5)
            x,y = start
            v_x_old, v_y_old = v_x, v_y
            gradient = partial_grad_f(alpha*v_x+(1.0-alpha)*x, alpha*v_y+(1.0-alpha)*y, random)
            exact_gradient = gradient
            x_exact, y_exact = alpha*v_x+(1.0-alpha)*x, alpha*v_y+(1.0-alpha)*y
            while abs(exact_gradient[random]) > 1: #偏微分係数の値が1.0*10^-3を下回るまでやる。
                exact_gradient = partial_grad_f(x_exact, y_exact, random)
                x_exact, y_exact = x_exact - learning_rate* exact_gradient[0], y_exact -  learning_rate* exact_gradient[1]
            x_new = x_exact
            y_new = y_exact
            v_x = v_x - gamma*learning_rate * gradient[0]
            v_y = v_y - gamma*learning_rate * gradient[1]
            points.append([x,y, f(x,y),v_x_old, v_y_old,alpha*v_x_old+(1.0-alpha)*x, alpha*v_y_old+(1.0-alpha)*y]) #alphaは正しい値ではない。
            start = (x_new,y_new)
    elif mode == "exact_ACDM2":
        gamma = 1/8.0
        c= 1/8.0
        v_x, v_y = x, y # the momentum point
        for i in range(step_num+1):
            gamma_temp = gamma
            gamma = math.sqrt(c**2+gamma_temp**2) + c
            alpha = gamma/(4.0*gamma_temp**2+gamma)
            random = scipy.stats.bernoulli.rvs(0.5)
            x,y = start
            v_x_old, v_y_old = v_x, v_y
            gradient = partial_grad_f(alpha*v_x+(1.0-alpha)*x, alpha*v_y+(1.0-alpha)*y, random)
            exact_gradient = gradient
            x_exact, y_exact = alpha*v_x+(1.0-alpha)*x, alpha*v_y+(1.0-alpha)*y
            while abs(exact_gradient[random]) > 0.01: #偏微分係数の値が1.0*10^-3を下回るまでやる。
                exact_gradient = partial_grad_f(x_exact, y_exact, random)
                x_exact, y_exact = x_exact - learning_rate * exact_gradient[0], y_exact - learning_rate * exact_gradient[1]
                v_x = v_x - gamma*learning_rate * exact_gradient[0]
                v_y = v_y - gamma*learning_rate * exact_gradient[1]         
            x_new = x_exact
            y_new = y_exact
            points.append([x,y,f(x,y),v_x_old, v_y_old,alpha*v_x_old+(1.0-alpha)*x, alpha*v_y_old+(1.0-alpha)*y])
            start = (x_new,y_new)
        
    else:
        print("Error")
    return points
        
        
            



    
    

In [6]:
GD_show = False
momentum_show = False
ACDM_show = False
exact_ACDM_show = True
exact_ACDM_show2 = False

#グラフ関係の設定
fig = plt.figure()
ax = fig.add_subplot(111, aspect=1)
x = np.linspace(-100, 100, 100)
y = np.linspace(-100, 100, 100).reshape(-1, 1)
#扱う関数
def f(x, y):
    return x**2 + y**2 + 50*x
#     return np.exp(x)/(np.exp(x)+np.exp(y))
#     return x**4+y**4
#ハイパーパラメータ設定
hyperparameters={"x":50,
"y":20,
"step_num" : 20,
"learning_rate":1e-2,
"momentum":0.8}

GD_points = optimize(mode="GD", **hyperparameters)
momentum_points = optimize(mode="momentum", **hyperparameters)
ACDM_points = optimize(mode="ACDM",**hyperparameters)
exact_ACDM_points = optimize(mode="exact_ACDM",**hyperparameters)
exact_ACDM_points2 = optimize(mode="exact_ACDM2",**hyperparameters)
def update(z):
    ax.cla() # ax をクリア
    ax.pcolormesh(x,y,f(x,y))
    if GD_show:
        ax.plot(GD_points[z][0],GD_points[z][1], "o", c="red", label="GD")
    if momentum_show:
        ax.plot(momentum_points[z][0],momentum_points[z][1], "o", c="blue", label="momentum")
    if ACDM_show:
        ax.plot(ACDM_points[z][0],ACDM_points[z][1], "o", c="green",label="ACDM_x")
        ax.plot(ACDM_points[z][3],ACDM_points[z][4], "o", c="blue",label="ACDM_v")
        ax.plot(ACDM_points[z][5],ACDM_points[z][6], "o", c="red",label="ACDM_y")
    if exact_ACDM_show:
        ax.plot(exact_ACDM_points[z][0],exact_ACDM_points[z][1], "o", c="green",label="exact_ACDM_x")
        ax.plot(exact_ACDM_points[z][3],exact_ACDM_points[z][4], "o", c="blue",label="exact_ACDM_v")
        ax.plot(exact_ACDM_points[z][5],exact_ACDM_points[z][6], "o", c="red",label="exact_ACDM_y")
    if exact_ACDM_show2:
        ax.plot(exact_ACDM_points2[z][0],exact_ACDM_points2[z][1], "o", c="green",label="exact_ACDM2_x")
        ax.plot(exact_ACDM_points2[z][3],exact_ACDM_points2[z][4], "o", c="blue",label="exact_ACDM2_v")
        ax.plot(exact_ACDM_points2[z][5],exact_ACDM_points2[z][6], "o", c="red",label="exact_ACDM2_y")
    ax.legend(loc="upper left")
anim = FuncAnimation(fig, update, frames=np.arange(hyperparameters["step_num"]), interval=200)

#anim.save("c03.gif", writer="imagemagick")
plt.close()
HTML(anim.to_jshtml())

In [7]:
x = [i for i in range(len(GD_points))]
GD_points = pd.DataFrame(data=GD_points, columns=["x","y","loss"])
momentum_points = pd.DataFrame(data=momentum_points, columns=["x","y","loss"])
ACDM_points = pd.DataFrame(data=ACDM_points, columns=["x","y","loss"])
exact_ACDM_points = pd.DataFrame(data=exact_ACDM_points, columns=["x","y","loss"])
plt.plot(x, GD_points["loss"],label="GD")
plt.plot(x, momentum_points["loss"],label="momentum")
plt.plot(x, ACDM_points["loss"],label="ACDM")
plt.plot(x, exact_ACDM_points["loss"],label="exact_ACDM")
plt.legend(loc="best")
plt.show()

ValueError: 3 columns passed, passed data had 7 columns