In [13]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

In [14]:
data = keras.datasets.fashion_mnist

In [15]:
(train_images, train_labels), (test_images, test_labels) = data.load_data()

In [16]:
class_names = ['t_shirt', 'trouser', 'pullover', 'dress', 'coat',
          'sandal', 'shirt', 'sneaker', 'bag', 'ankle_boots']

In [17]:
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import state_ops
from tensorflow.python.framework import ops
from tensorflow.python.training import optimizer
#from tensorflow.python.eager import context
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.training import training_ops

from keras import backend_config
from keras.optimizers.optimizer_v2 import optimizer_v2

class AdamOptimizer(optimizer_v2.OptimizerV2):
    def __init__(
        self,
        learning_rate=0.001,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-7,
        amsgrad=False,
        name="Adam",
        **kwargs
    ):
        super().__init__(name, **kwargs)
        self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
        self._set_hyper("decay", self._initial_decay)
        self._set_hyper("beta_1", beta_1)
        self._set_hyper("beta_2", beta_2)
        self.epsilon = epsilon or backend_config.epsilon()
        self.amsgrad = amsgrad

    def _create_slots(self, var_list):
        for var in var_list:
            self.add_slot(var, "m")
        for var in var_list:
            self.add_slot(var, "v")
        if self.amsgrad:
            for var in var_list:
                self.add_slot(var, "vhat")

    def _prepare_local(self, var_device, var_dtype, apply_state):
        super()._prepare_local(var_device, var_dtype, apply_state)

        local_step = tf.cast(self.iterations + 1, var_dtype)
        beta_1_t = tf.identity(self._get_hyper("beta_1", var_dtype))
        beta_2_t = tf.identity(self._get_hyper("beta_2", var_dtype))
        beta_1_power = tf.pow(beta_1_t, local_step)
        beta_2_power = tf.pow(beta_2_t, local_step)
        stepSizeUpperBoundParameter = tf.sqrt(1 - beta_2_power) / (1 - beta_1_power)
        lr = apply_state[(var_device, var_dtype)]["lr_t"] * (
            stepSizeUpperBoundParameter
        )
        apply_state[(var_device, var_dtype)].update(
            dict(
                lr=lr,
                epsilon=tf.convert_to_tensor(self.epsilon, var_dtype),
                beta_1_t=beta_1_t,
                beta_1_power=beta_1_power,
                one_minus_beta_1_t=1 - beta_1_t,
                beta_2_t=beta_2_t,
                beta_2_power=beta_2_power,
                one_minus_beta_2_t=1 - beta_2_t,
            )
        )

    def set_weights(self, weights):
        params = self.weights
        num_vars = int((len(params) - 1) / 2)
        total_vars = 3 * num_vars + 1
        newWeights = []
        if len(weights) == total_vars:
            for i in range(0,len(params)):
              newWeights.append(weights[i])
        super().set_weights(newWeights)

    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = (apply_state or {}).get(
            (var_device, var_dtype)
        ) or self._fallback_apply_state(var_device, var_dtype)

        m = self.get_slot(var, "m")
        v = self.get_slot(var, "v")


        return tf.raw_ops.ResourceApplyAdam(
            var=var.handle,
            m=m.handle,
            v=v.handle,
            beta1_power=coefficients["beta_1_power"],
            beta2_power=coefficients["beta_2_power"],
            lr=coefficients["lr_t"],
            beta1=coefficients["beta_1_t"],
            beta2=coefficients["beta_2_t"],
            epsilon=coefficients["epsilon"],
            grad=grad,
            use_locking=self._use_locking,
        )

In [18]:
def optimizer_fn(optimizer, lr, name='Optimizer'):
    with tf.compat.v1.variable_scope(name):
        global_step = tf.Variable(1, dtype=tf.float32, trainable=False)
        cur_lr = lr / tf.math.sqrt(x=global_step)

        if optimizer == 'SGDNesterov':
            return tf.keras.optimizers.SGD(learning_rate=lr,momentum=0.99,nesterov=True)
        elif optimizer == 'Adagrad':
            return tf.keras.optimizers.Adagrad(learning_rate=cur_lr)
        elif optimizer == 'RMSProp':
            return tf.keras.optimizers.RMSprop(learning_rate=cur_lr)
        elif optimizer == 'AdaDelta':
            return tf.keras.optimizers.Adadelta(learning_rate=cur_lr)
        elif optimizer == 'Adam':
            return AdamOptimizer(learning_rate=cur_lr)
        else:
            raise NotImplementedError(" [*] Optimizer is not included in list!")

In [19]:
train_images = train_images/255.0
test_images = test_images/255.0

In [20]:
def getModel(dropout,rate=0.2):
    if dropout:
        model = keras.Sequential([
		keras.layers.Flatten(input_shape=(28,28)),
		keras.layers.Dropout(rate),
		keras.layers.Dense(1000, activation="relu"),
		keras.layers.Dropout(rate),
		keras.layers.Dense(1000, activation="relu"),
		keras.layers.Dropout(rate),
		keras.layers.Dense(10, activation="softmax")
		])
        return model
    else:
        model = keras.Sequential([
		keras.layers.Flatten(input_shape=(28,28)),
		keras.layers.Dense(1000, activation="relu"),
		keras.layers.Dense(1000, activation="relu"),
		keras.layers.Dense(10, activation="softmax")
		])
        return model




In [23]:
def NN(lr):
	optimizer_list=['SGDNesterov','Adagrad','RMSProp','AdaDelta','Adam']
	dropout_list=[True,False]
	for optimizer in optimizer_list:
		for dropout in dropout_list:
			print("Optimizer: ",optimizer)
			print("Dropout: ",dropout)
			model = getModel(dropout)
			model.compile(optimizer=optimizer_fn(optimizer, lr), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
			model.fit(train_images, train_labels, epochs=20)
			test_loss, test_acc = model.evaluate(test_images, test_labels)
			print('Test accuracy:', test_acc)
			print("===========================================")

	

In [22]:
NN(0.001)

Optimizer:  SGDNesterov
Dropout:  True
Test accuracy: 0.8282999992370605
Optimizer:  SGDNesterov
Dropout:  False
Test accuracy: 0.8503999710083008
Optimizer:  Adagrad
Dropout:  True
Test accuracy: 0.7516000270843506
Optimizer:  Adagrad
Dropout:  False
Test accuracy: 0.7815999984741211
Optimizer:  RMSProp
Dropout:  True
Test accuracy: 0.817799985408783
Optimizer:  RMSProp
Dropout:  False
Test accuracy: 0.8409000039100647
Optimizer:  AdaDelta
Dropout:  True
Test accuracy: 0.6089000105857849
Optimizer:  AdaDelta
Dropout:  False
Test accuracy: 0.6381999850273132
Optimizer:  Adam
Dropout:  True
Test accuracy: 0.8464999794960022
Optimizer:  Adam
Dropout:  False
Test accuracy: 0.8561000227928162


In [24]:
NN(0.01)

Optimizer:  SGDNesterov
Dropout:  True
Test accuracy: 0.717199981212616
Optimizer:  SGDNesterov
Dropout:  False
Test accuracy: 0.8169999718666077
Optimizer:  Adagrad
Dropout:  True
Test accuracy: 0.821399986743927
Optimizer:  Adagrad
Dropout:  False
Test accuracy: 0.8334000110626221
Optimizer:  RMSProp
Dropout:  True
Test accuracy: 0.7763000130653381
Optimizer:  RMSProp
Dropout:  False
Test accuracy: 0.8217999935150146
Optimizer:  AdaDelta
Dropout:  True
Test accuracy: 0.7541999816894531
Optimizer:  AdaDelta
Dropout:  False
Test accuracy: 0.7967000007629395
Optimizer:  Adam
Dropout:  True
Test accuracy: 0.7828999757766724
Optimizer:  Adam
Dropout:  False
Test accuracy: 0.8163999915122986
