# Optimizers Tensorflow

# Simple Demo 1D SGD with animation

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import animation, rc
%matplotlib widget
%matplotlib notebook

opt = tf.keras.optimizers.SGD(learning_rate=0.1)
var = tf.Variable(5.0)
loss = lambda: (var ** 2)/2.0         # d(loss)/d(var1) = var1
tt = []
for j in range(100):
    step_count = opt.minimize(loss, [var]).numpy()
    tt.append(var.numpy())


lossf = lambda x: (x ** 2)/2.0         # d(loss)/d(var1) = var1

x=list(np.linspace(-6, 6.0, 100))
y = []
for xx in x:
    y.append(lossf(xx))

ll = []
for t in tt:
    ll.append(lossf(t))

fig, ax = plt.subplots()
ax.plot(x, y)
ax.scatter(tt, ll)

xx = []
x_sgd = tf.Variable(5.0)
def animate_optimizer(frame, opt, loss, lossf, x_sgd, xx, p2, p3):
    step_count = opt.minimize(loss, var_list=[x_sgd]).numpy()
    p3.set_data(x_sgd.numpy(), lossf(x_sgd.numpy()))
    xx.append(x_sgd.numpy() )
    loss_y = [lossf(xt) for xt in xx]
    p2.set_data(xx, loss_y)
    return p2, p3, x_sgd


plt.clf
fig, ax = plt.subplots()
ax.plot(x, y)
ax.set_yticklabels([])
ax.set_xticklabels([])
ax.set_xlabel('w1')
ax.set_ylabel('Loss')
ax.set_title('Gradient Descent')

## 1d 1single weight loss plot

x_sgd = tf.Variable(5.0)
p1, = ax.plot([x_sgd], [lossf(x_sgd)], 'k')
p2, = ax.plot([], [], color='red', alpha=0.6)
p2, = ax.plot([], [],  'r.', marker='.', alpha=.5)

p3, = ax.plot([], [],  'r.', marker='.', alpha=.5, ms=10)
max_iter = 100
loss = lambda: (x_sgd ** 2)/2.0      

anim2 = animation.FuncAnimation(fig, animate_optimizer, 
                                frames=range(0, max_iter),  fargs = (opt, loss, lossf, x_sgd, xx, p2, p3), blit=True, interval=50,repeat=True, repeat_delay=20)
filename = '/home/ronen/Downloads/sgd_1d_intro.gif'
anim2
anim2.save(filename, dpi=80, writer='imagemagick', fps=5)







# Simple Demo  SGD with 2D Contour animation animation

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import animation, rc
%matplotlib widget
%matplotlib notebook

opt = tf.keras.optimizers.SGD(learning_rate=0.1)
var = tf.Variable(5.0)
loss = lambda: (var ** 2)/2.0         # d(loss)/d(var1) = var1


#### Now contour 2 weights:


def plot_contour(loss_with_args):
    x = np.linspace(-5.5, 5.5, 100)
    y = np.linspace(-5.5, 5.5, 100)
    X, Y = np.meshgrid(x, y)
    levels = [0.1,1,2,4,9, 16, 25, 36, 49, 64, 81, 100, 121, 144,169, 196, 225, 256, 289]
    Z = loss_with_args(X,Y)
    fig, ax = plt.subplots()
    ax.contour(X, Y, Z, levels, colors='black')
    return fig,ax

def animate_optimizer(frame, opt, loss_func, xxx, yyy, xx, yy, p2, p3, p4):
    step_count = opt.minimize(loss_func, var_list=[xxx, yyy]).numpy()
 
    p3.set_data(xxx.numpy(), yyy.numpy())
    xx.append(xxx.numpy())
    yy.append(yyy.numpy())
    p2.set_data(xx, yy)
    p4.set_data(xx, yy)
    return p2, xxx, yyy


plt.clf
loss_with_args = lambda x, y: (x ** 2)/2.0  +(y ** 2)/2.0    

fig1, ax = plot_contour(loss_with_args)

ax.set_yticklabels([])
ax.set_xticklabels([])
ax.set_xlabel('w1')
ax.set_ylabel('w2')
ax.set_title('Gradient Descent - 2 Weights Loss Contours')

p1, = ax.plot([tf.Variable(-5.0)], [tf.Variable(-2.0)], 'kx')
p2, = ax.plot([], [],  'r.', marker='.')
p4, = ax.plot([], [],  'r', marker='.', alpha=.5)
p3, = ax.plot([], [], 'r.')
max_iter = 100
xxx = tf.Variable(5.0)
yyy = tf.Variable(2.0)
xx = []
yy = []
loss_func = lambda: (xxx ** 2)/2.0  + (yyy ** 2)/2.0   

anim2 = animation.FuncAnimation(fig1, animate_optimizer, frames=range(0, max_iter), fargs = (opt, loss_func, xxx, yyy, xx, yy, p2, p3, p4),  blit=True, interval=50,repeat=True, repeat_delay=20)
filename = 'sgd_2d_contour_intro.gif'
anim2.save('/home/ronen/Downloads/'+filename, dpi=80, writer='imagemagick', fps=5)







# 3d SGD demo animation

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import animation, rc
import mpl_toolkits.mplot3d.axes3d as p3
%matplotlib widget
%matplotlib notebook


def animate_optimizer_3d(frame, opt, loss_func, loss_func_args, xxx, yyy, xx, yy, zz, ax, p2, p3, p4):
   
    step_count = opt.minimize(loss_func, var_list=[xxx, yyy]).numpy()
    ax.title.set_text('3D Plot, Frame={}'.format(frame))
   
    p3.set_data(xxx.numpy(), yyy.numpy())
    p3.set_3d_properties(loss_func_args(xxx.numpy(), yyy.numpy())) 
    
    p2.set_data(np.array(xx), np.array(yy))
    p2.set_3d_properties(np.array(zz)) 
    
    p4.set_data(np.array(xx), np.array(yy))
    p4.set_3d_properties(np.array(zz)) 
    
    xx.append(xxx.numpy())
    yy.append(yyy.numpy())
    zz.append(loss_func_args(xxx.numpy(), yyy.numpy())) 

    return p3, xxx, yyy


def plot_loss_func_3d(loss_func_args):

    xx = np.linspace(-5.5, 5.5, 100)
    yy = np.linspace(-5.5, 5.5, 100)
    X, Y = np.meshgrid(xx, yy)
    Z = loss_func_args(X, Y)
    fig1 = plt.figure(figsize=(16, 6))
    ax1 = fig1.gca(projection='3d')

    surf = ax1.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.5, 
                           linewidth=0, antialiased=False)

    ax1.set_xlabel('w1', fontsize=20)
    ax1.set_ylabel('w2', fontsize=20)
    ax1.set_zlabel('J(b, w)', fontsize=20)
    ax1.xaxis.set_ticklabels([])
    ax1.yaxis.set_ticklabels([])
    ax1.zaxis.set_ticklabels([])
    return fig1, ax1



run_params = [{'filename': 'sgd_3d_contour_intro.gif'}, {'filename': 'sgd_3d_contour_intro.gif', 'elev': 90}]
opt = tf.keras.optimizers.SGD(learning_rate=0.1)

max_iter = 100

for params in run_params:

    xxx = tf.Variable(5.0)
    yyy = tf.Variable(5.0)
    xx = []
    yy = []
    zz = []
    filename = params['filename']
    loss_func_args = lambda xxx, yyy: (xxx ** 2)/2.0  + (yyy ** 2)/2.0 
    loss_func = lambda: (xxx ** 2)/2.0  + (yyy ** 2)/2.0   

    plt.clf
    fig, ax = plot_loss_func_3d(loss_func_args)
    title = ax.set_title('3D Top View')
    z = loss_func_args(xxx.numpy(), yyy.numpy())
    p3, = ax.plot(xxx.numpy(), yyy.numpy(), z,  'ro')
    p2, = ax.plot(np.array([xxx.numpy()]), np.array([yyy.numpy()]), np.array([z]))
    p4, = ax.plot(np.array([xxx.numpy()]), np.array([yyy.numpy()]), np.array([z]),  'r', alpha=.5)
    if 'elev' in params:
        ax.view_init(azim=0, elev=elev)
    anim2 = animation.FuncAnimation(fig, animate_optimizer_3d, frames=range(0, max_iter), fargs =(opt, loss_func, loss_func_args, xxx, yyy, xx, yy, zz, ax, p2, p3, p4), blit=True, interval=30,repeat=True, repeat_delay=20)
    anim2.save('/home/ronen/Downloads/'+filename, dpi=80, writer='imagemagick', fps=5)





# SGD and momentum together - no animation #

In [None]:
# Gradeint Descent with large step size with animation

import matplotlib.pyplot as plt
import numpy as np
import matplotlib.animation as animation

# %matplotlib widget
# %matplotlib notebook



import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import animation, rc
%matplotlib widget
%matplotlib notebook

X_COEF=1
Y_COEF=40
alpha = 0.02517
x_offset = 2
y_offset = 1.5


loss_with_args = lambda xxx, yyy: X_COEF*(xxx)**2 + Y_COEF*(yyy)**2         # d(loss)/d(var1) = var1
loss = lambda: X_COEF*(xxx)**2 + Y_COEF*(yyy)**2         # d(loss)/d(var1) = var1


def plot_contour():
    x = np.linspace(-5.5, 5.5, 100)
    y = np.linspace(-5.5, 5.5, 100)
    X, Y = np.meshgrid(x, y)
    levels = [0.1,1,2,4,9, 16, 25, 36, 49, 64, 81, 100, 121, 144,169, 196, 225, 256, 289]
    Z = loss_with_args(X,Y)
    fig, ax = plt.subplots()
    ax.contour(X, Y, Z, levels, colors='black')
    return fig,ax



def do_optimize(opt, num_iterations, name):
    loss = lambda: X_COEF*(xxx)**2 + Y_COEF*(yyy)**2         # d(loss)/d(var1) = var1

    fig, ax = plot_contour()
    xxx = tf.Variable(-5.0)
    yyy = tf.Variable(-2.0)
    xx = []
    yy = []
    for j in range(num_iterations):
        step_count = opt.minimize(loss, var_list=[xxx, yyy]).numpy()
        xx.append(xxx.numpy())
        yy.append(yyy.numpy())

    ax.plot(xx, yy, "o-")
    ax.set_xlabel('Bias')
    ax.set_ylabel('Weight')
    ax.set_title('Optimization using {name} - Contour Plot'.format(name=name))


# SGD
num_iterations = 100
opt = tf.keras.optimizers.SGD(learning_rate=alpha)
do_optimize(opt, num_iterations, name = "SGD")

# Momentum
opt = tf.keras.optimizers.SGD(learning_rate=alpha, momentum=0.9)
do_optimize(opt, num_iterations, name = "Momentum")

# ADAM
num_iterations = 150

opt = tf.keras.optimizers.Adam(
    learning_rate=alpha, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam'
)

opt = do_optimize(opt, num_iterations, name = "Adam")


# ADAMAX

opt = tf.keras.optimizers.Adamax(
    learning_rate=alpha, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
    name='Adamax'
)

do_optimize(opt, num_iterations, name = "Adamax")


# var1 = tf.Variable(10.0)
# loss = lambda: (var1 ** 2)/2.0       # d(loss)/d(var1) == var1
# step_count = opt.minimize(loss, [var1]).numpy()
# The first step is `-learning_rate*sign(grad)`
# var1.numpy()

# opimize(opt, ax)

# Adagrad
num_iterations=600
opt = tf.keras.optimizers.Adagrad(
    learning_rate=alpha, initial_accumulator_value=0.1, epsilon=1e-07,
    name='Adagrad'
)
do_optimize(opt, num_iterations, name = "Adagrad")











# Optimizers with contours - Utility functions

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import animation, rc
%matplotlib widget
%matplotlib notebook


X_COEF=1
Y_COEF=40
alpha = 0.02517
x_offset = 2
y_offset = 1.5



def plot_contour(loss_with_args, title):

    x = np.linspace(-5.5, 5.5, 100)
    y = np.linspace(-5.5, 5.5, 100)
    X, Y = np.meshgrid(x, y)
    levels = [0.1,1,2,4,9, 16, 25, 36, 49, 64, 81, 100, 121, 144,169, 196, 225, 256, 289]
    Z = loss_with_args(X,Y)
    fig, ax = plt.subplots()
    ax.contour(X, Y, Z, levels, colors='black')
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    ax.set_xlabel('w1')
    ax.set_ylabel('b')

    ax.set_title(title)
    return fig,ax

def init_animate_optimizer():
    global xs
    global xxx
    global yyy
    global opt
    global xx
    global yy
    

    xxx = tf.Variable(-5.0)
    yyy = tf.Variable(-2.0)
    xx = []
    yy = []
    return p1,



xx = []
yy = []
def animate_optimizer(frame, opt, loss_func):
    global x
    global xxx
    global yyy
    global xx
    global yy
    global p2, p3
   
    step_count = opt.minimize(loss_func, var_list=[xxx, yyy]).numpy()

    p2.set_data(xx, yy)
    p3.set_data(xxx.numpy(), yyy.numpy())
    xx.append(xxx.numpy())
    yy.append(yyy.numpy())
    return p2,







# Run optimizations - static plots with loss_diff_in_gradients - first execute  box above

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import animation, rc
%matplotlib widget
%matplotlib notebook

def do_optimize(loss_with_args, opt, num_iterations, name):


    fig, ax = plot_contour(loss_with_args, name)

    xx = []
    yy = []
    
    xxx = tf.Variable(-5.0)
    yyy = tf.Variable(-2.0)
    loss_func = lambda: 1*(xxx)**2 + 40*(yyy)**2  



    for j in range(num_iterations):
        step_count = opt.minimize(loss_func, var_list=[xxx, yyy])
        xx.append(xxx.numpy())
        yy.append(yyy.numpy())

    ax.plot(xx, yy, "o-")
    ax.set_xlabel('Bias')
    ax.set_ylabel('Weight')
    ax.set_title('Optimization using {name} - Contour Plot'.format(name=name))
      
    ####  Anther loss func ###############################
    fig, ax = plot_contour(lambda x, y:  (x)**4 - 10 * (x)** 2 - 3 * (x)+ 40*(y)**2 , name)

    xxx = tf.Variable(-5.0)
    yyy = tf.Variable(-2.0)
    xx = []
    yy = []
    loss_func = lambda:  xxx**4 - 10 * xxx** 2 - 3 * xxx + 40*yyy**2 
    
    for j in range(num_iterations):
        step_count = opt.minimize(loss_func, var_list=[xxx, yyy])
        xx.append(xxx.numpy())
        yy.append(yyy.numpy())

    ax.plot(xx, yy, "o-")
    ax.set_xlabel('Bias')
    ax.set_ylabel('Weight')
    ax.set_title('Optimization using {name} - Contour Plot'.format(name=name))
    
#     fig1, ax1 = plt.subplots()
#     xcoords = np.linspace(-3,3,100)
#     ycoord = [loss_func_y(xcoord)  for xcoord in xcoords]
#     ax1.plot(xcoords, ycoord)
    
#     loss_coord = [loss_func_y(y_coord)  for y_coord in yy]
#     ax1.plot(yy, loss_coord, 'o-', color='red', alpha=0.6)
#     ax1.set_xlabel('Weight')
#     ax1.set_ylabel('Loss')
#     ax1.set_title('Weight Loss using {name} -  Plot'.format(name=name))                  


def run_optimization_static_contour_graph(loss_func_with_args):
#     x=loss_func().numpy()
#     print(x)
#     alpha = 0.0067
    optimizers_list = []
    optimizers_list.append({'name': 'sgd', 'opt': tf.keras.optimizers.SGD(learning_rate=alpha), 'max_iter': 100})
    optimizers_list.append({'name': 'momentum', 'opt': tf.keras.optimizers.SGD(learning_rate=alpha, momentum=0.9), 'max_iter': 100})
    optimizers_list.append({'name': 'adam', 'opt':  tf.keras.optimizers.Adam(
    learning_rate=alpha, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam'), 'max_iter': 450})
    optimizers_list.append({'name': 'adamax', 'opt':  tf.keras.optimizers.Adamax(
    learning_rate=alpha, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
    name='Adamax'), 'max_iter': 450})
    
    
    for optimizer in optimizers_list:
        print(optimizer['name'])
        opt = optimizer['opt']
        plt.clf
#         fig1, ax = plot_contour(loss_with_args=loss_func_with_args, title=optimizer['name'])
        p1, = ax.plot([tf.Variable(-5.0)], [tf.Variable(-2.0)], 'kx')
        p2, = ax.plot([], [], color='red', alpha=0.6)
        p3, = ax.plot([], [], 'r.')
        max_iter = optimizer['max_iter']
        do_optimize(loss_with_args=loss_with_args, opt=opt, num_iterations=max_iter, name=optimizer['name'])


xxx = tf.Variable(-5.0)
yyy = tf.Variable(-2.0)
loss_func = lambda: 1*(xxx)**2 + 40*(yyy)**2  

# loss_func = lambda: 1*(xxx)**2 + 40*(yyy)**2  
loss_func_with_args = lambda xxx, yyy: 1*(xxx)**2 + 40*(yyy)**2        


    

run_optimization_static_contour_graph(loss_func_with_args=loss_func_with_args)
# out_file_name_prefix = 'loss_diff_gradients_2d_contour_'
                                                      


# Scratch static contour with saddle point - fix it

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import animation, rc
%matplotlib widget
%matplotlib notebook

def plot_contour_limitted(loss_with_args, title):

    x = np.linspace(1, 8, 100)
    y = np.linspace(-5.5, 5.5, 100)
    X, Y = np.meshgrid(x, y)
    levels = [0.1,1,2,4,9, 16, 25, 36, 49, 64, 81, 100, 121, 144,169, 196, 225, 256, 289]
    Z = loss_with_args(X,Y)
    fig, ax = plt.subplots()
    ax.contour(X, Y, Z, levels, colors='black')
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    ax.set_xlabel('w1')
    ax.set_ylabel('b')

    ax.set_title(title)
    return fig,ax

def do_optimize(loss_with_args, opt, num_iterations, name):
#     global loss_func
#     global xxx
#     global yyy

 
#     x=loss_func().numpy()
#     print(x)

    fig, ax = plot_contour(loss_with_args, name)

    xx = []
    yy = []
    
    xxx = tf.Variable(-5.0)
    yyy = tf.Variable(-2.0)
    loss_func = lambda: 1*(xxx)**2 - 40*(yyy)**2  



    for j in range(num_iterations):
        step_count = opt.minimize(loss_func, var_list=[xxx, yyy])
        xx.append(xxx.numpy())
        yy.append(yyy.numpy())

    ax.plot(xx, yy, "o-")
    ax.set_xlabel('Bias')
    ax.set_ylabel('Weight')
    ax.set_title('Optimization using {name} - Contour Plot'.format(name=name))
      
    ####  Anther loss func ###############################
    fig, ax = plot_contour_limitted(lambda x, y:  4.0*tf.cos(x-1)+tf.divide(tf.cos(2.0*np.pi*x),x)+ 40*(y)**2 , name)

    xxx = tf.Variable(1.0)
    yyy = tf.Variable(-2.0)
    xx = []
    yy = []
    loss_func = lambda: 4.0*tf.cos(xxx-1)+tf.divide(tf.cos(2.0*np.pi*xxx),xxx) + 40*yyy**2 
    
    for j in range(num_iterations):
        step_count = opt.minimize(loss_func, var_list=[xxx, yyy])
        xx.append(xxx.numpy())
        yy.append(yyy.numpy())

    ax.plot(xx, yy, "o-")
    ax.set_xlabel('Bias')
    ax.set_ylabel('Weight')
    ax.set_title('Optimization using {name} - Contour Plot'.format(name=name))
    
#     fig1, ax1 = plt.subplots()
#     xcoords = np.linspace(-3,3,100)
#     ycoord = [loss_func_y(xcoord)  for xcoord in xcoords]
#     ax1.plot(xcoords, ycoord)
    
#     loss_coord = [loss_func_y(y_coord)  for y_coord in yy]
#     ax1.plot(yy, loss_coord, 'o-', color='red', alpha=0.6)
#     ax1.set_xlabel('Weight')
#     ax1.set_ylabel('Loss')
#     ax1.set_title('Weight Loss using {name} -  Plot'.format(name=name))                  


def run_optimization_static_contour_graph(loss_func_with_args):
#     x=loss_func().numpy()
    alpha = 0.0067
    alpha = 0.02517
    alpha = 0.001

    print(alpha)
    optimizers_list = []
    optimizers_list.append({'name': 'sgd', 'opt': tf.keras.optimizers.SGD(learning_rate=alpha), 'max_iter': 100})
    optimizers_list.append({'name': 'momentum', 'opt': tf.keras.optimizers.SGD(learning_rate=alpha, momentum=0.9), 'max_iter': 300})
    optimizers_list.append({'name': 'adam', 'opt':  tf.keras.optimizers.Adam(
    learning_rate=alpha, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam'), 'max_iter': 650})
    optimizers_list.append({'name': 'adamax', 'opt':  tf.keras.optimizers.Adamax(
    learning_rate=alpha, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
    name='Adamax'), 'max_iter': 650})
    
    
    for optimizer in optimizers_list:
        print(optimizer['name'])
        opt = optimizer['opt']
        plt.clf
#         fig1, ax = plot_contour(loss_with_args=loss_func_with_args, title=optimizer['name'])
        p1, = ax.plot([tf.Variable(-5.0)], [tf.Variable(-2.0)], 'kx')
        p2, = ax.plot([], [], color='red', alpha=0.6)
        p3, = ax.plot([], [], 'r.')
        max_iter = optimizer['max_iter']
        do_optimize(loss_with_args=loss_with_args, opt=opt, num_iterations=max_iter, name=optimizer['name'])


xxx = tf.Variable(1.0)
yyy = tf.Variable(-2.0)
loss_func = lambda: 1*(xxx)**2 + 40*(yyy)**2  

# loss_func = lambda: 1*(xxx)**2 + 40*(yyy)**2  
loss_func_with_args = lambda xxx, yyy: 1*(xxx)**2 - 40*(yyy)**2        


    

run_optimization_static_contour_graph(loss_func_with_args=loss_func_with_args)
# out_file_name_prefix = 'loss_diff_gradients_2d_contour_'
                                                      


# scratch 2:


In [None]:
# # #####3D:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.animation as animation
from matplotlib import animation, rc

%matplotlib widget
%matplotlib notebook

max_iter=100

X_COEF=1 #20
Y_COEF=40
alpha = 0.02517

x_offset = 0
y_offset = 0
max_iter = 70

alpha = 0.0067
x_offset = 0
y_offset = 0
max_iter= 150




def f2(x):
    return X_COEF*(x[0]+x_offset)**2 + Y_COEF*(x[1]+y_offset)**2

def grad2(x):
    return np.array([4 * x[0]**3 - 20 * x[0] - 3, 2*Y_COEF*(x[1]+y_offset)])
    #return np.array([4 * x**3 - 10 * x - 3, 2*Y_COEF*(x[1]+y_offset)])




v = 0

def gd2_momentum_1(x, frame, alpha, grad=grad2, beta=0.9):
    global v
    v = beta*v + (1-beta)*grad(x)
    vc = v/(1+beta**(frame+1))
    x = x - alpha * vc
    return x



rc('animation', html='html5')
plt.style.use('seaborn-whitegrid')

def cost_func(x, y):
    Z = (x +x_offset)**4 - 20 * (x +x_offset)** 2 - 3 * (x +x_offset)+ Y_COEF*(y+y_offset)**2
#     Z = X_COEF*(X+x_offset)**2 + Y_COEF*(Y+y_offset)**2
    return Z

def plot_loss_func_3d():
    xx = np.linspace(-5.5-x_offset, 5.5-y_offset, 50)
    yy = np.linspace(-5.5-x_offset, 5.5-y_offset, 50)
    X, Y = np.meshgrid(xx, yy)
    Z = cost_func(X, Y)
    fig1 = plt.figure(figsize=(16, 6))
    ax1 = fig1.gca(projection='3d')

    surf = ax1.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.5, 
                           linewidth=0, antialiased=False)

    ax1.set_xlabel('b', fontsize=20)
    ax1.set_ylabel('w1', fontsize=20)
    ax1.set_zlabel('J(b, w)', fontsize=20)
    ax1.xaxis.set_ticklabels([])
    ax1.yaxis.set_ticklabels([])
    ax1.zaxis.set_ticklabels([])
    plt.show()
    return fig1, ax1

# plot_loss_func_3d()
x = np.linspace(0,5,100)
loss = lambda x: 2*x **4 - 20 * x** 2# - 3 * x
loss = lambda x: 4.0*tf.cos(x-1)+tf.divide(tf.cos(2.0*np.pi*x),x)
y = loss(x)
plt.plot(x, y)

# Run optimizations - Contour Animation with loss_diff_in_gradients - first execute  box above

In [None]:
loss_diff_in_gradients = lambda: 1*(xxx)**2 + 40*(yyy)**2         # d(loss)/d(var1) = var1
loss_func = loss_diff_in_gradients
loss_with_args = lambda xxx, yyy: X_COEF*(xxx)**2 + Y_COEF*(yyy)**2         # d(loss)/d(var1) = var1
out_file_name_prefix = 'loss_diff_gradients_2d_contour_'

optimizers_list = []

optimizers_list.append({'name': 'sgd', 'opt': tf.keras.optimizers.SGD(learning_rate=alpha), 'max_iter': 100})
optimizers_list.append({'name': 'momentum', 'opt': tf.keras.optimizers.SGD(learning_rate=alpha, momentum=0.9), 'max_iter': 100})
optimizers_list.append({'name': 'adam', 'opt':  tf.keras.optimizers.Adam(
    learning_rate=alpha, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam'), 'max_iter': 200})
optimizers_list.append({'name': 'adam', 'opt':  tf.keras.optimizers.Adamax(
    learning_rate=alpha, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
    name='Adamax'), 'max_iter': 200})


                   
for optimizer in optimizers_list:
    print(optimizer['name'])
    opt = optimizer['opt']
    plt.clf
    fig1, ax = plot_contour(loss_with_args=loss_with_args, title=optimizer['name'])
    p1, = ax.plot([tf.Variable(-5.0)], [tf.Variable(-2.0)], 'kx')
    p2, = ax.plot([], [], color='red', alpha=0.6)
    p3, = ax.plot([], [], 'r.')
    max_iter = optimizer['max_iter']
    anim2 = animation.FuncAnimation(fig1, animate_optimizer, init_func=init_animate_optimizer, frames=range(0, max_iter), fargs= (opt, loss_func,), blit=True, interval=50,repeat=True, repeat_delay=20)
    filename = optimizer['name']+'.gif'
    anim2.save('/home/ronen/Downloads/'+out_file_name_prefix+filename, dpi=80, writer='imagemagick', fps=5)
    init_animate_optimizer()

# Run optimizations - Contout animation with loss local minima - first execute  box above

In [None]:
x_offset=0
y_offset=0
loss_local_min = lambda:  (x +x_offset)**4 - 10 * (x +x_offset)** 2 - 3 * (x +x_offset)+ Y_COEF*(y+y_offset)**2
loss_func = loss_local_min
out_file_name_prefix = 'loss_local_minima_2d_contour_'
loss_with_args = lambda xxx, yyy: (x +x_offset)**4 - 10 * (x +x_offset)** 2 - 3 * (x +x_offset)+ Y_COEF*(y+y_offset)**2


optimizers_list = []

optimizers_list.append({'name': 'sgd', 'opt': tf.keras.optimizers.SGD(learning_rate=alpha)', 'max_iter': 100})
optimizers_list.append({'name': 'momentum', 'opt': tf.keras.optimizers.SGD(learning_rate=alpha, momentum=0.9)', 'max_iter': 100})
optimizers_list.append({'name': 'adam', 'opt':  tf.keras.optimizers.Adam(
    learning_rate=alpha, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam')', 'max_iter': 200})
optimizers_list.append({'name': 'adam', 'opt':  tf.keras.optimizers.Adamax(
    learning_rate=alpha, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
    name='Adamax')', 'max_iter': 200})


                   
for optimizer in optimizers_list:
    print(optimizer['name'])
    opt = optimizer['opt']
    plt.clf
    fig1, ax = plot_contour(loss_with_args=loss_with_args, title=optimizer['name'])
    p1, = ax.plot([tf.Variable(-5.0)], [tf.Variable(-2.0)], 'kx')
    p2, = ax.plot([], [], color='red', alpha=0.6)
    p3, = ax.plot([], [], 'r.')
    max_iter = optimizer['max_iter']
    anim2 = animation.FuncAnimation(fig1, animate_optimizer, init_func=init_animate_optimizer, frames=range(0, max_iter), fargs= (opt, loss_func,), blit=True, interval=50,repeat=True, repeat_delay=20)
    filename = optimizer['name']+'.gif'
    anim2.save('/home/ronen/Downloads/'+out_file_name_prefix+filename, dpi=80, writer='imagemagick', fps=5)
    init_animate_optimizer()

# DEmo multi plot animation

In [None]:
import matplotlib
matplotlib.use('Qt5Agg') #use Qt5 as backend, comment this line for default backend

from matplotlib import pyplot as plt
from matplotlib import animation

fig = plt.figure()

ax = plt.axes(xlim=(0, 2), ylim=(0, 100))

N = 4
lines = [plt.plot([], [])[0] for _ in range(N)] #lines to animate

rectangles = plt.bar([0.5,1,1.5],[50,40,90],width=0.1) #rectangles to animate

patches = lines + list(rectangles) #things to animate

def init():
    #init lines
    for line in lines:
        line.set_data([], [])

    #init rectangles
    for rectangle in rectangles:
        rectangle.set_height(0)

    return patches #return everything that must be updated

def animate(i):
    #animate lines
    for j,line in enumerate(lines):
        line.set_data([0, 2], [10 * j,i])

    #animate rectangles
    for j,rectangle in enumerate(rectangles):
        rectangle.set_height(i/(j+1))

    return patches #return everything that must be updated

anim = animation.FuncAnimation(fig, animate, init_func=init,
                               frames=100, interval=20, blit=True)

plt.show()