In [11]:
import tensorflow as tf;
from tensorflow import python as tf_python
import numpy as np;
import time;
import os;
import sys;

DTYPE = tf.float32;
EPS = np.finfo(np.double).eps

In [12]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    print("We got a GPU")
    print(physical_devices)
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
else:
    print("Sorry, no GPU for you...")

We got a GPU
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [13]:
def and_data():
    input_array = np.array( [ [ 0.0, 0.0 ], [ 0.0, 1.0 ], [ 1.0, 0.0 ], [ 1.0, 1.0 ] ] );
    output_array = np.array( [ [ 0.0 ], [ 0.0 ], [ 0.0 ], [ 1.0 ] ] );
    return ( input_array.astype('float32'), output_array.astype('float32') ),\
( input_array.astype('float32'), output_array.astype('float32') );
    
def or_data():
    input_array = np.array( [ [ 0.0, 0.0 ], [ 0.0, 1.0 ], [ 1.0, 0.0 ], [ 1.0, 1.0 ] ] );
    output_array = np.array( [ [ 0.0 ], [ 1.0 ], [ 1.0 ], [ 1.0 ] ] );
    return ( input_array.astype('float32'), output_array.astype('float32') ),\
( input_array.astype('float32'), output_array.astype('float32') );
    
def xor_data():
    input_array = np.array( [ [ 0.0, 0.0 ], [ 0.0, 1.0 ], [ 1.0, 0.0 ], [ 1.0, 1.0 ] ] );
    output_array = np.array( [ [ 0.0 ], [ 1.0 ], [ 1.0 ], [ 0.0 ] ] );
    return ( input_array.astype('float32'), output_array.astype('float32') ),\
( input_array.astype('float32'), output_array.astype('float32') );

def xor_rolled_data():
    input_array = np.array( [ [ 0.0, 0.0 ], [ 0.0, 1.0 ], [ 1.0, 0.0 ], [ 1.0, 1.0 ] ] );
    output_array = np.array( [ [ 0.2 ], [ 0.8 ], [ 0.8 ], [ 0.2 ] ] );
    return ( input_array.astype('float32'), output_array.astype('float32') ),\
( input_array.astype('float32'), output_array.astype('float32') );
    
def not_data():
    # compute not x_0
    input_array = np.array( [ [ 0.0, 0.0 ], [ 0.0, 1.0 ], [ 1.0, 0.0 ], [ 1.0, 1.0 ] ] );
    output_array = np.array( [ [ 1.0 ], [ 1.0 ], [ 0.0 ], [ 0.0 ] ] );
    return ( input_array.astype('float32'), output_array.astype('float32') ),\
( input_array.astype('float32'), output_array.astype('float32') );


def extended_and_data():
    input_array = np.array( [ [ 0.1, 0.1, 0.1 ], [ 0.1, 0.1, 0.9 ], [ 0.1, 0.9, 0.1 ], [ 0.1, 0.9, 0.9 ] , [ 0.9, 0.1, 0.1 ],  [ 0.9, 0.1, 0.9 ],  [ 0.9, 0.9, 0.1 ],  [ 0.9, 0.9, 0.9 ] ]);
    output_array = np.array( [ [ 0.1 ], [ 0.1 ], [ 0.1 ], [ 0.1 ], [ 0.1 ], [ 0.1 ], [ 0.1 ], [ 0.9 ] ] );
    #input_array = np.array( [ [ 0.1, 0.1, 0.1 ], [ 0.1, 0.1, 0.9 ], [ 0.1, 0.9, 0.1 ], [ 0.1, 0.9, 0.9 ] ]);
    #output_array = np.array( [ [ 0.1 ], [ 0.1 ], [ 0.1 ], [ 0.1 ] ] );
    return ( input_array.astype('float32'), output_array.astype('float32') ),\
( input_array.astype('float32'), output_array.astype('float32') );
    
def extended_or_data():
    input_array = np.array( [ [ 0.1, 0.1, 0.1 ], [ 0.1, 0.1, 0.9 ], [ 0.1, 0.9, 0.1 ], [ 0.1, 0.9, 0.9 ] , [ 0.9, 0.1, 0.1 ],  [ 0.9, 0.1, 0.9 ],  [ 0.9, 0.9, 0.1 ],  [ 0.9, 0.9, 0.9 ] ]);
    #input_array = np.array( [ [ 0.1, 0.1, 0.1 ], [ 0.1, 0.1, 0.9 ], [ 0.1, 0.9, 0.1 ], [ 0.1, 0.9, 0.9 ] ]);
    output_array = np.array( [ [ 0.1 ], [ 0.9 ], [ 0.9 ], [ 0.9 ], [ 0.9 ], [ 0.9 ], [ 0.9 ], [ 0.9 ] ] );
    
    return ( input_array.astype('float32'), output_array.astype('float32') ),\
( input_array.astype('float32'), output_array.astype('float32') );
    
def extended_xor_data():
    input_array = np.array( [ [ 0.1, 0.1, 0.1 ], [ 0.1, 0.1, 0.9 ], [ 0.1, 0.9, 0.1 ] ]);
    output_array = np.array( [ [ 0.1 ], [ 0.9 ], [ 0.9 ] ] );
    return ( input_array.astype('float32'), output_array.astype('float32') ),\
( input_array.astype('float32'), output_array.astype('float32') );


def load_data( name ):
    if name == "and":
        return and_data();
    if name == "or":
        return or_data();
    if name == "xor":
        return xor_data();
    if name == "not":
        return not_data();
    if name == "xor_rolled":
        return xor_rolled_data();
    if name == "extended_and":
        return extended_and_data();
    if name == "extended_or":
        return extended_or_data();
    if name == "extended_xor":
        return extended_xor_data();

In [14]:
import abc

class AbstractLayer(metaclass=abc.ABCMeta):
    @abc.abstractmethod
    def output(self, input):
        """Compute the activation values of the neurons in this Layer."""
        raise NotImplementedError("Must override output method")

    @abc.abstractmethod
    def update(self, loss, tape, optimizer):
        """Update the loss for the layer."""
        raise NotImplementedError("Must override update method")

    @abc.abstractmethod
    def gradientDescent_update(self, loss, tape):
        """Update the gradient according to the tape and the loss."""
        raise NotImplementedError("Must override gradientDescent_update method")

    @abc.abstractmethod
    def get_gradient(self, loss, tape):
        """Get the gradient according to the tape."""
        raise NotImplementedError("Must override get_gradient method")

    @abc.abstractmethod
    def reset(self):
        """Reset the layer."""
        raise NotImplementedError("Must override reset method")

In [15]:
import tensorflow as tf
from typing import Callable, Dict

class BaseLayer(AbstractLayer):
    """A layer in a neural network."""

    def __init__(self, params: Dict, name: str, node_no: int, transfer: Callable):
        """Initialize the layer."""
        self.params = params
        self.name = name
        self.node_no = node_no
        self.transfer = transfer
        self.inputs = None
        self.outputs = None
        self.biases   = tf.Variable( 
                              self.params['BIAS_INIT']( [ self.node_no ], dtype=DTYPE ),             
                              name='B_%s' % (self.name,),
                              trainable=True );

    def __rshift__(self, other):
        """Connect two layers together."""
        print(other)

    @tf.function
    def output(self, input):
        """Compute the activation values of the neurons in this Layer."""
        print( self.name, self.inputs.output(input).shape, self.biases.shape)
        return self.transfer(tf.nn.bias_add(self.inputs.output(input), self.biases))

    def gradientDescent_update(self, loss, tape):
        """Update the biases according to the gradient descent bias update."""
        gradient = self.get_gradient(loss, tape)
        self.biases.assign(self.biases - self.params['LEARNING_RATE'] * gradient)

    def get_gradient(self, loss, tape):
        """Return the gradient from the tape."""
        return tape.gradient(loss, self.biases)

    def update(self, loss, tape, optimizer):
        """Update the biases according to the optimizer object's bias update rule."""
        gradient = self.get_gradient(loss, tape)
        optimizer.apply_gradients(zip([gradient], [self.biases]))

    def alopex_update(self, optimizer):
        """Update the biases according to the Alopex algorithm."""
        self.biases.assign_add(self.params['ALOPEX_LEARNING_RATE'] * tf.cast(optimizer.update_x[self.biases.name], dtype=DTYPE))

    def reset(self):
        """Reset the bias."""
        self.biases.assign(self.params['BIAS_INIT']([self.node_no], dtype=DTYPE))

In [16]:
class Layer(BaseLayer):
    """A layer in a neural network."""

    def __init__(self, params: Dict, name: str, node_no: int, transfer: Callable):
        """Initialize the layer."""
        super().__init__(params, name, node_no, transfer)

    def __rshift__(self, other):
        """Create a neural network by connecting two layers together."""
        return NN(self, other)

In [17]:
class InputLayer(Layer):
    """An input layer in a neural network."""

    def __init__(self, params: Dict, name: str, node_no: int):
        """Initialize the layer."""
        self.params = params
        self.name = name
        self.node_no = node_no

    @tf.function
    def output(self, inputs):
        """Return the inputs as the output of this layer."""
        return inputs

    def update(self, loss, tape, optimizer):
        """No update is needed for the input layer."""
        pass

    def gradientDescent_update(self, loss, tape):
        """No gradient descent update is needed for the input layer."""
        pass

    def get_gradient(self, loss, tape):
        """No gradient is needed for the input layer."""
        pass

    def alopex_update(self, optimizer):
        """No Alopex update is needed for the input layer."""
        pass

    def reset(self):
        """No reset is needed for the input layer."""
        pass

In [18]:
class WeightLayer(BaseLayer):
    """A layer of weights in a neural network."""

    def __init__(self, params: Dict, src: Layer, dest: Layer):
        self.params  = params;
        self.src     = src;
        # make init a function that will randomize weights
        self.name    = "W_%s_%s" % (src.name,dest.name);
        
        self.dest    = dest;
        self.weights = tf.Variable( 
                           self.params['WEIGHT_INIT']( [ src.node_no, 
                                                         dest.node_no ], 
                                        dtype=DTYPE ), 
                           name=self.name,
                              trainable=True  );
        
        self.src.outputs = self;
        self.dest.inputs = self;

    @tf.function
    def output(self, inputs):
        """Compute the output of this layer."""
        print( self.name,self.src.name, self.dest.name,self.src.output(inputs).shape,self.weights.shape)
        return tf.matmul(self.src.output(inputs), self.weights)

    def get_gradient(self, loss, tape):
        """Compute the gradient of the loss with respect to the weights."""
        return tape.gradient(loss, self.weights)

    def gradientDescent_update(self, loss, tape):
        """Update the weights using the gradient descent algorithm."""
        gradient = self.get_gradient(loss, tape)
        self.weights.assign(self.weights - self.params['LEARNING_RATE'] * gradient)

    def update(self, loss, tape, optimizer):
        """Update the weights using the provided optimizer."""
        gradient = self.get_gradient(loss, tape)
        print(gradient)
        print(self.weights)
        optimizer.apply_gradients(zip([gradient], [self.weights]))

    def alopex_update(self, optimizer):
        """Update the weights using the Alopex algorithm."""
        self.weights.assign_add(
            self.params['ALOPEX_LEARNING_RATE'] * tf.cast(optimizer.update_x[self.weights.name], dtype=DTYPE)
        )

    def reset(self):
        """Reset the weights to their initial values."""
        self.weights.assign(
            self.params['WEIGHT_INIT']([self.src.node_no, self.dest.node_no], dtype=DTYPE)
        )

In [19]:
class NN:
    """
    This class represents a Neural Network.
    """

    def __init__(self, layer_0, layer_1):
        """
        Construct a network with an input layer, layer_0, and one more 
        layer, layer_1.
        """
        self.params = layer_0.params
        self.layers = [layer_0, layer_1]
        self.input_layer = self.layers[0]
        self.weights = [WeightLayer(self.params, layer_0, layer_1)]
        self.output_layer = self.layers[-1]
        self.trainable_variables = [self.weights[0].weights, self.output_layer.biases]
        self.loss_fn = None
        self.optimizer = None

    def __rshift__(self, other):
        """
        Add a Layer to the network.
        """
        if isinstance(other, Layer):
            self.layers.append(other)
            self.weights.append(WeightLayer(self.params, self.layers[-2], other))
            self.output_layer = self.layers[-1]
            self.trainable_variables.extend([self.weights[-1].weights, self.output_layer.biases])
            return self

        if isinstance(other, tf_python.eager.polymorphic_function.polymorphic_function.Function):
            self.loss_fn = other
            return self

        if isinstance(other, str):
            self.optimizer = other
            return self

        raise TypeError(f"Unsupported type: {type(other)}")

    @tf.function
    def confusion(self, targets, inputs):
        return tf.math.confusion_matrix(targets, tf.reshape(tf.math.round(self.output(inputs)), [targets.shape[0]]))

    @tf.function
    def output(self, inputs):
        """
        Compute the output values generated by the NN for a given input.
        """
        return self.output_layer.output(inputs)

    def reset(self):
        """
        Reset all the weights and biases.
        """
        for weight in self.weights:
            weight.reset()

        for layer in self.layers:
            layer.reset()

    @tf.function
    def loss(self, target, inputs):
        """
        Apply the loss function to compute the loss.
        """
        return self.loss_fn(target, self.output(inputs))

    def train(self, target, inputs, epoch, mini_batch):
        """
        Propagate the inputs through the network and
        update the weights and layers (biases) based on
        the target values.
        """
        with tf.GradientTape(persistent=True) as tape:
            loss = self.loss(target, inputs)

        for weight in self.weights:
            self.logs["weight_gradients"][epoch][mini_batch] = weight.get_gradient(loss, tape).numpy()
            weight.gradientDescent_update(loss, tape)

        for layer in self.layers[1:]:
            self.logs["bias_gradients"][epoch][mini_batch] = weight.get_gradient(loss, tape).numpy()
            layer.gradientDescent_update(loss, tape)
    
    def train_epochs(self, y_train, x_train, y_test, x_test, show=False):
        """
        Perform multiple epochs of training based on global parameter
        values.
        """
        # Define the shapes for each log entry
        log_shapes = {
            "train_losses": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH']),
            "train_corrects": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH']),
            "test_losses": (self.params['MAX_EPOCHS']+1,),
            "test_corrects": (self.params['MAX_EPOCHS']+1,),
            "weights": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH'], self.params['N_INPUTS'], self.params['N_HIDDEN']),
            "weight_output": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH'], len(x_train), self.params['N_INPUTS']),
            "biases": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH'], self.params['N_INPUTS']),
            "layer_output": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH'], len(x_train), self.params['N_INPUTS']),
            "nn_output": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH'], len(x_train), self.params['N_OUTPUTS']),
            "mini_batch_before_time": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH']),
            "mini_batch_after_time": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH']),
            "mini_batch_total_time": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH']),
            "weight_gradients": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH'], self.params['N_INPUTS'], self.params['N_HIDDEN']),
            "bias_gradients": (self.params['MAX_EPOCHS']+1, self.params['MINI_BATCH'], self.params['N_HIDDEN'], self.params['N_OUTPUTS'])
        }

        # Initialize the logs with zeros
        self.logs = {key: np.zeros(shape, dtype=np.float32) for key, shape in log_shapes.items()}

        MB_SIZE = self.params['TRAINING_SIZE'] // self.params['MINI_BATCH']
        self.logs['before_time'] = time.time()
        mini_batch = 0

        for epoch in range(self.params['MAX_EPOCHS']+1):
            self.logs['test_losses'][epoch] = self.loss(y_test, x_test)
            test_conf = self.confusion(y_test, x_test)
            self.logs['test_corrects'][epoch] = tf.linalg.trace(test_conf).numpy()

            x, y = double_shuffle(x_train, y_train)

            for mini_batch in range(self.params['MINI_BATCH']):
                self.logs['mini_batch_before_time'][epoch][mini_batch] = time.time()

                mbx = x[MB_SIZE*mini_batch:MB_SIZE*(mini_batch+1)]
                mby = y[MB_SIZE*mini_batch:MB_SIZE*(mini_batch+1)]

                unshuffled_x = x_train[MB_SIZE*mini_batch:MB_SIZE*(mini_batch+1)]
                unshuffled_y = y_train[MB_SIZE*mini_batch:MB_SIZE*(mini_batch+1)]

                train_loss = self.loss(mby, mbx)
                self.logs['train_losses'][epoch][mini_batch] = train_loss

                conf = self.confusion(mby, mbx)
                self.logs['train_corrects'][epoch][mini_batch] = tf.linalg.trace(conf)

                

                for idx, (weight, layer) in enumerate(zip(self.weights, self.layers[1:])):
                    #print( weight.weights.name )
                    
                    self.logs['weights'][epoch][mini_batch] = weight.weights.numpy()
                    self.logs['weight_output'][epoch][mini_batch] = weight.output(unshuffled_x).numpy()

                    self.logs['biases'][epoch][mini_batch] = layer.biases.numpy()
                    self.logs['layer_output'][epoch][mini_batch] = layer.output(unshuffled_x).numpy()

                  


                self.logs['nn_output'][epoch][mini_batch] = self.output(unshuffled_x).numpy()

                if epoch != self.params['MAX_EPOCHS']:
                    self.train(mby, mbx, epoch, mini_batch)

                self.logs['mini_batch_after_time'][epoch][mini_batch] = time.time()
                self.logs['mini_batch_total_time'][epoch][mini_batch] = self.logs['mini_batch_after_time'][epoch][mini_batch] - self.logs['mini_batch_before_time'][epoch][mini_batch]

            if show:
                sys.stdout.write("%d,    Training Loss: %lf,    Testing Loss: %lf,\n" % (epoch, self.loss(y_train, x_train), self.loss(y_test, x_test)))

        self.logs['after_time'] = time.time()
        self.logs['test_conf'] = test_conf
        self.logs['total_time'] = self.logs['after_time'] - self.logs['before_time']

        return self.logs

@tf.function
def preprocess(input):
    """
    Converts the 28x28 pixel images into 784 dimensional feature vectors.
    Also converts the uint8 into DTYPE.
    """
    return tf.cast(tf.reshape(input, (input.shape[0], -1)), DTYPE) / 256.0

@tf.function
def one_hot(y_train):
    """
    Converts labels into one-hot vectors.
    """
    return tf.one_hot(y_train, 2)

@tf.function
def un_hot(inputs):
    """
    Converts one-hot vectors back into labels.
    """
    return tf.math.argmax(inputs, axis=1)

def double_shuffle(inputs, targets):
    """
    Shuffles the inputs and targets in the same order.
    """
    if inputs.shape[0] != targets.shape[0]:
        raise ValueError("Inputs and targets must have the same first dimension!")

    shuffle_indices = tf.random.shuffle(tf.range(inputs.shape[0]))
    return tf.gather(inputs, shuffle_indices).numpy(), tf.gather(targets, shuffle_indices).numpy()

def gpu_utilization():
    """
    Returns the GPU utilization rates.
    """
    return [nv.nvmlDeviceGetUtilizationRates(handle).gpu for handle in handles]

In [20]:
# Load data
( x_train, y_train ), ( x_test, y_test ) = load_data( "xor" );



# Define activation functions
sigmoid = tf.nn.sigmoid
softmax = tf.nn.softmax

# Define initializers
vs_init = tf.initializers.GlorotUniform()  # unseeded
zeros_init = tf.zeros
DTYPE = tf.float32

# Define learning rate
learning_rate = 0.5

# Define parameters
PARAMETERS1 = {
    'WEIGHT_INIT_NAME': 'vs_init',
    'LOGFILE_NAME': 'Log_trial:',
    'WEIGHT_INIT': vs_init,
    'BIAS_INIT': zeros_init,
    'N_INPUTS': 2,
    'N_HIDDEN': 2,
    'N_OUTPUTS': 1,
    'LEARNING_RATE': learning_rate,
    'MAX_EPOCHS': 1000,  # maximum number of epochs to run
    'TRAINING_SIZE': x_train.shape[0],  # number of training examples
    'TEST_SIZE': x_test.shape[0],  # number of testing examples
    'MAX_EXPERIMENTS': 1,
    'MINI_BATCH': 1,  # full batch
    'DTYPE': tf.float32
}

with tf.device('/device:GPU:1'):
    @tf.function
    def loss_function( y_target, y_pred ):
        return -tf.reduce_mean( y_target * tf.math.log(y_pred + EPS) + (1 - y_target) * tf.math.log(1 - y_pred + EPS) );

# Run experiments
for trial in range(PARAMETERS1['MAX_EXPERIMENTS']):
    vs_init = tf.initializers.GlorotUniform(seed=trial)
    PARAMETERS1['WEIGHT_INIT'] = vs_init

    # Define layers
    input_l = InputLayer(PARAMETERS1, "input", PARAMETERS1['N_INPUTS'])
    hidden_l = Layer(PARAMETERS1, "hidden", PARAMETERS1['N_HIDDEN'], sigmoid)
    output_l = Layer(PARAMETERS1, "output", PARAMETERS1['N_OUTPUTS'], sigmoid)

    # Define network
    nn = input_l >> hidden_l >> output_l >> loss_function;
    
    learning_algorithm = "GD"
    algorithm_name = "GD"
    nn = nn >> learning_algorithm

    # Train and reset
    logs = nn.train_epochs(y_train, x_train, y_test, x_test, show=True)
    nn.reset()
    

W_hidden_output <__main__.WeightLayer object at 0x7fd97c780250>
W_input_hidden <__main__.WeightLayer object at 0x7fd92b952610>
W_input_hidden input hidden (4, 2) (2, 2)
W_hidden_output hidden output (4, 2) (2, 1)


0,    Training Loss: 0.693021,    Testing Loss: 0.693021,
1,    Training Loss: 0.692913,    Testing Loss: 0.692913,
2,    Training Loss: 0.692827,    Testing Loss: 0.692827,
3,    Training Loss: 0.692754,    Testing Loss: 0.692754,
4,    Training Loss: 0.692689,    Testing Loss: 0.692689,
5,    Training Loss: 0.692630,    Testing Loss: 0.692630,
6,    Training Loss: 0.692574,    Testing Loss: 0.692574,
7,    Training Loss: 0.692519,    Testing Loss: 0.692519,
8,    Training Loss: 0.692467,    Testing Loss: 0.692467,
9,    Training Loss: 0.692415,    Testing Loss: 0.692415,
10,    Training Loss: 0.692363,    Testing Loss: 0.692363,
11,    Training Loss: 0.692312,    Testing Loss: 0.692312,
12,    Training Loss: 0.692262,    Testing Loss: 0.692262,
13,    Training Loss: 0.692211,    Testing Loss: 0.692211,
14,    Training Loss: 0.692161,    Testing Loss: 0.692161,
15,    Training Loss: 0.692111,    Testing Loss: 0.692111,
16,    Training Loss: 0.692061,    Testing Loss: 0.692061,
17,    