In [None]:
# !pip uninstall tensorflow
# !pip install tensorflow==1.13.1
import tensorflow as tf
print(tf.__version__)

In [None]:
import os
import csv
import logging
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
# import tensorflow as tf
from datetime import datetime

In [None]:
# MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [None]:
x_train = x_train.reshape(60000, 28 * 28) / 255
x_test = x_test.reshape(10000, 28 * 28) / 255
with tf.Session() as sesh:
    y_train = sesh.run(tf.one_hot(y_train, 10))
    y_test = sesh.run(tf.one_hot(y_test, 10))

In [None]:
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import state_ops
from tensorflow.python.framework import ops
from tensorflow.python.training import optimizer
#from tensorflow.python.eager import context
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.training import training_ops
class AdamOptimizer(optimizer.Optimizer):
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, use_locking=False, epsilon=1e-8, name="Adam"):
    # def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.699, use_locking=False, epsilon=1e-8, name="Adam"):
        super(AdamOptimizer, self).__init__(use_locking, name)
        self._lr = learning_rate
        self._beta1 = beta1
        self._beta2 = beta2

        # Tensor versions of the constructor arguments, created in _prepare().
        self._lr_t = None
        self._beta1_t = None
        self._beta2_t = None
        self._beta1_power = None
        self._beta2_power = None

    def _prepare(self):
        self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate")
        self._beta1_t = ops.convert_to_tensor(self._beta1, name="beta1")
        self._beta2_t = ops.convert_to_tensor(self._beta2, name="beta2")

    def _create_slots(self, var_list):
        # Create slots for the first and second moments.
        first_var = min(var_list, key=lambda x: x.name)
        with ops.colocate_with(first_var):
            self._beta1_power = variable_scope.variable(self._beta1, name="beta1_power", trainable=False)
            self._beta2_power = variable_scope.variable(self._beta2, name="beta2_power", trainable=False)
        # Create slots for the first and second moments.
        for v in var_list:
            self._zeros_slot(v, "m1", self._name)
            self._zeros_slot(v, "v1", self._name)

    def _apply_dense(self, grad, var):
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
        beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        eps = 1e-8 
        v = self.get_slot(var, "v1")
        v_t = v.assign(beta2_t * v + (1. - beta2_t) * grad**2)
        m = self.get_slot(var, "m1")
        m_t = m.assign(beta1_t * m + (1. - beta1_t) * grad)
        alpha_t =  tf.sqrt(1 - beta2_power) / (1 - beta1_power) 
        g_t =  (m_t*alpha_t) / (tf.sqrt(v_t) + eps)
        var_update = state_ops.assign_sub(var, lr_t * g_t)
        return control_flow_ops.group(*[var_update, v_t, m_t])

    def _apply_sparse(self, grad, var):
        raise NotImplementedError("Sparse gradient updates are not supported.")

In [None]:
optimizer_options = ['SGDNesterov', 'Adagrad', 'RMSProp', 'AdaDelta', 'Adam']
dropout_options = [False]

In [None]:
def optimizer_fn(optimizer, lr, loss, name='Optimizer'):
    with tf.variable_scope(name):
        global_step = tf.Variable(1, dtype=tf.float32, trainable=False)
        cur_lr = lr / tf.math.sqrt(x=global_step)

        if optimizer == 'SGDNesterov':
            return tf.train.MomentumOptimizer(learning_rate=cur_lr,
                                              momentum=0.99,
                                              name='SGDNesterov',
                                              use_nesterov=True).minimize(loss, global_step=global_step), cur_lr
        elif optimizer == 'Adagrad':
            return tf.train.AdagradOptimizer(learning_rate=cur_lr).minimize(loss, global_step=global_step), cur_lr
        elif optimizer == 'RMSProp':
            return tf.train.RMSPropOptimizer(learning_rate=cur_lr).minimize(loss, global_step=global_step), cur_lr
        elif optimizer == 'AdaDelta':
            return tf.train.AdadeltaOptimizer(learning_rate=cur_lr).minimize(loss, global_step=global_step), cur_lr
        elif optimizer == 'Adam':
            return AdamOptimizer(learning_rate=cur_lr).minimize(loss, global_step=global_step), cur_lr
        else:
            raise NotImplementedError(" [*] Optimizer is not included in list!")

learning_rate = 0.01
epochs = 20
batch_size = 100
batches = int(x_train.shape[0] / batch_size)
for optimizer in optimizer_options:
  print('\nOptimizer: {}\n'.format(optimizer))
  X = tf.placeholder(tf.float32, [None, 784])
  Y = tf.placeholder(tf.float32, [None, 10])
  W = tf.Variable(0.1 * np.random.randn(784, 10).astype(np.float32))
  B = tf.Variable(0.1 * np.random.randn(10).astype(np.float32))
  Y_predicted = tf.nn.softmax(tf.add(tf.matmul(X, W), B))
  loss = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(Y_predicted), axis=1))
  optimizer = optimizer_fn(optimizer,learning_rate,loss)
  with tf.Session () as sess:
    sess.run ( tf.global_variables_initializer ( ) )
    for epoch in range(epochs):
      for i in range(batches):
        offset = i * epoch
        x = x_train[offset: offset + batch_size]
        y = y_train[offset: offset + batch_size]
        sess.run(optimizer, feed_dict={X: x, Y:y})
        c = sess.run(loss, feed_dict={X:x, Y:y})
      print(f'epoch:{epoch:2d} cost={c:.4f}')
    correct_pred = tf.equal(tf.argmax(Y_predicted, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    acc = accuracy.eval({X: x_test, Y: y_test})
    print(f'Accuracy: {acc * 100:.2f}%')