In [32]:
import tensorflow as tf 
import numpy as np 
import matplotlib.pyplot as plt 
import copy 

In [33]:
# Optimal weights that we can obtain 

W_opt = np.array([[1], [2]], dtype=np.float64) 
print(W_opt) 

[[1.]
 [2.]]


In [34]:
N = 500 # Number of data points 
delta = 2 # some random scaler 
X1 = (np.random.rand(N) - 0.5) * 2 * delta 
X2 = (np.random.rand(N) - 0.5) * 2 * delta 
X_train = np.array([x for x in zip(X1,X2)])
print(f"X_train shape: {X_train.shape}")

X_train shape: (500, 2)


In [35]:
y_value = X_train.dot(W_opt) + np.random.normal(0.0, 0.2)
y_train = np.ones([N, 1])
y_train[np.where(y_value < 0)] = 0
num_pos = len(np.argwhere(y_train == 1)[:, 0])
num_neg = len(np.argwhere(y_train == 0)[:, 0])
print("Number of positive labels: {}, number of negative labels: {}".format(num_pos, num_neg))

Number of positive labels: 249, number of negative labels: 251


In [36]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

mnist = tf.keras.datasets.mnist 
(X_train_full_img, y_train_full), (X_test_img, y_test) = mnist.load_data() 

In [37]:
num_train = X_train_full_img.shape[0] # First value tells you the number of ds 
num_test = X_test_img.shape[0] 
X_train_full = X_train_full_img.reshape(num_train, -1)/255.0
X_test = X_test_img.reshape(num_test, -1) / 255.0 
print(X_train_full.shape, y_train_full.shape) 
print(X_test.shape, y_test.shape)


(60000, 784) (60000,)
(10000, 784) (10000,)


In [82]:
class DNN:
    def __init__(self, n_classes=10, optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
                 batch_size=32, epochs=20, alpha=1.0, beta=1.0):
        self.n_classes = 10
        self.batch_size = batch_size
        self.epochs = epochs
        self.optimizer = optimizer
        self.alpha = alpha  # hyper-parameter corresponding to entropy loss
        self.beta = beta  # hyper-parameter for the max-margin loss

        # create a tensorflow dataset and shuffle
        self.train_full_set = tf.data.Dataset.from_tensor_slices(
            (X_train_full, y_train_full)).shuffle(1000)

        # TODO: I AM UNSURE WHY THIS DOESN'T WORK
        self.test_full_set = tf.data.Dataset.from_tensor_slices(
            (X_test, y_test))

        # take train and valid sets from full dataset
        self.train_set = self.train_full_set.take(50000)
        self.valid_set = self.train_full_set.skip(50000).take(10000)
        # batching train and valid sets
        self.train_set = self.train_set.batch(self.batch_size).prefetch(1)
        self.valid_set = self.valid_set.batch(self.batch_size).prefetch(1)

        tf.keras.backend.set_floatx('float64')

    def build(self):
        self.model = Sequential([Dense(20, activation='relu'), Dense(20, activation='relu'),
                                 Dense(self.n_classes, activation='softmax')])
        self.model.compile(optimizer='SGD', loss=tf.keras.metrics.SparseCategoricalAccuracy())

    def compute_loss(self, X, y):  # X is data batch, y is label batch
        pred_probs = self.model(X)
        l1 = tf.keras.losses.sparse_categorical_crossentropy(
            y, pred_probs)  # Cross entropy loss
        # Prediction entropy loss
        l2 = tf.reduce_sum(- pred_probs * tf.math.log(pred_probs), axis=-1)

        assert (l1.shape == l2.shape)
        return l1 + self.alpha * l2

    def compute_grads(self, X, y):
        with tf.GradientTape() as g:  # use gradient tape to compute gradients
            loss = self.compute_loss(X, y)
        # compute gradients w.r.t. all trainable variables
        grads = g.gradient(loss, self.model.trainable_variables)
        return grads

    def train_one_batch(self, X, y):  # train in one batch
        grads = self.compute_grads(X, y)
        # the gradients will be applied according to optimizer for example SGD, Adam, and etc.
        self.optimizer.apply_gradients(
            zip(grads, self.model.trainable_variables))

    def evaluate(self, tf_dataset=None):
        dataset_loss = tf.keras.metrics.Mean()
        dataset_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        for X, y in tf_dataset:
            loss = self.compute_loss(X, y)
            dataset_loss.update_state(loss)
            dataset_accuracy.update_state(y, self.model(X, training=False))
        return dataset_loss.result(), dataset_accuracy.result()

    def save_model(self, path:str):
        self.model.save(path)
    
    def train_or_pred(self, training=True):

        if not training:
            # X, y = self.test_full_set
            dataset_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
            pred = self.model(X_test, training=False)
            dataset_accuracy.update_state(y_test, pred)
            print(f"Test Accuracy: {dataset_accuracy.result():.4f}")

        else:
            for epoch in range(self.epochs):
                for X, y in self.train_set:  # use batch_index if you want to display something in iterations
                    self.train_one_batch(X, y)
                train_loss, train_acc = self.evaluate(self.train_set)
                valid_loss, valid_acc = self.evaluate(self.valid_set)
                print('Epoch {}: train acc={:.4f}, train loss={:.4f} | valid acc={:.4f}, valid loss= {:.4f}'.format(epoch + 1,
                                                                                                                    train_acc, train_loss,
                                                                                                                    valid_acc, valid_loss))


In [83]:
opt = tf.keras.optimizers.SGD(learning_rate=0.001)
dnn = DNN(optimizer=opt, epochs=10, batch_size=64)
dnn.build()
# dnn.train()

In [84]:
dnn.train_or_pred()
dnn.save_model("mymodels/exe3dnn")
dnn.train_or_pred(training=False)

Epoch 1: train acc=0.9140, train loss=0.4867 | valid acc=0.9200, valid loss= 0.4450
Epoch 2: train acc=0.9353, train loss=0.3664 | valid acc=0.9396, valid loss= 0.3441
Epoch 3: train acc=0.9422, train loss=0.3293 | valid acc=0.9442, valid loss= 0.3134
Epoch 4: train acc=0.9476, train loss=0.2883 | valid acc=0.9491, valid loss= 0.2838
Epoch 5: train acc=0.9535, train loss=0.2587 | valid acc=0.9528, valid loss= 0.2667
Epoch 6: train acc=0.9541, train loss=0.2459 | valid acc=0.9538, valid loss= 0.2612
Epoch 7: train acc=0.9604, train loss=0.2176 | valid acc=0.9569, valid loss= 0.2410
Epoch 8: train acc=0.9610, train loss=0.2147 | valid acc=0.9561, valid loss= 0.2458
Epoch 9: train acc=0.9616, train loss=0.2140 | valid acc=0.9576, valid loss= 0.2432
Epoch 10: train acc=0.9622, train loss=0.2027 | valid acc=0.9568, valid loss= 0.2314
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.

In [None]:
new_model = tf.keras.models.load_model('mymodels/exe3dnn') 
# Probs not a good idea to save a DNN class
new_model.evaluate(X_test, y_test)