In [1]:
import numpy as np;
from tqdm import trange;

In [2]:
print( np.__version__ );

1.20.1


In [3]:
def xor_data():
    input_array = np.array( [ [ 0.0, 0.0 ], [ 0.0, 1.0 ], [ 1.0, 0.0 ], [ 1.0, 1.0 ] ] );
    output_array = np.array( [ [ 0.0 ], [ 1.0 ], [ 1.0 ], [ 0.0 ] ] );
    return ( input_array.astype('float32'), output_array.astype('float32') ),\
( input_array.astype('float32'), output_array.astype('float32') );

def load_data( name ):
    if name == "xor":
        return xor_data();

In [4]:
class Loss( object ):
    def __init__( self, name : str ):
        self.name = name;

    def __call__( self, y_pred, y_true, deriv=False ):
        if self.name == "mse":
            return np.mean( ( y_pred - y_true ) ** 2 );
    
        elif self.name == "cross_entropy" and deriv == False:
            return -np.mean( y_true * np.log( y_pred ) + ( 1 - y_true ) * np.log( 1 - y_pred ) );
        
        else:
            raise ValueError( "Invalid loss function" );

In [5]:
class GradientDescent(object):
    """
    Gradient Descent optimizer.
    """
    def __init__(self, parameters: dict):
        self.parameters = parameters;
        self.name = "Gradient Descent";
    
    def minimize( self, trainable_parameters, loss_obj: Loss ):
        """
        """

In [6]:
class BaseLayer( object ):
    """
    Base class for the Layer class.
    """
    def __init__( self, hyperparams : dict, name : str  ):
        """
        Constructor for the Base Layer class.

        args:
            params : dict
            node_no : int

        returns:
            None

        attributes:
            self.params : dict
            self.node_no : int

        raises:
            None

        """
        self.hyperparams = hyperparams;
        self.name = name;

In [7]:
class InputLayer( BaseLayer ):
    """
    Input layer for the perceptron.
    """
    def __init__( self, hyperparams : dict, name: str  ):
        """
        Constructor for the Input Layer class.

        args:
            params : dict
            node_no : int

        returns:
            None

        attributes:
            self.bias : array

        raises:
            None
        """
        super( InputLayer, self ).__init__( hyperparams, name );
        self.node_no = hyperparams['input_units'];
    
    def output( self, inputs ):
        """
        Mirror the inputs.

        args:
            inputs : array

        returns:
            array

        attributes:
            None

        raises:
            None
        """
        return inputs;

    def __rshift__( self, other ):
        """
        Overwrite the right shift operator to connect the layers.

        args:
            other : object

        returns:
            None

        attributes:
            None

        raises:
            None
        """
        return NeuralNetwork( self, other );

In [8]:
class Layer( BaseLayer ):
    """
    Layer class for the perceptron.
    """
    def __init__( self, hyperparams : dict, name: str, transfer: str  ):
        """
        Constructor for the Layer class.

        args:
            name : str
            params : dict
            node_no : int
            transfer : str

        returns:
            None

        attributes:
            self.name : str
            self.transfer : str
            self.inputs : object
            self.outputs : object

        raises:
            None
        """
        super( Layer, self ).__init__( hyperparams, name );
        self.transfer = transfer;

        self.inputs = None;
        self.outputs = None;
    
        if "hidden" in name:
            self.node_no = hyperparams['hidden_units'];
        else:
            self.node_no = hyperparams['output_units'];
        
        #create self.bias to be an array of size node_no with random values from a normal distribution between -1 and 1
        self.bias = np.random.uniform( -1, 1, self.node_no );
        
        
    def __rshift__( self, other ):
        """
        Overwrite the right shift operator to connect the layers.

        args:
            other : object

        returns:
            None

        attributes:
            None

        raises:
            None
        """
        return NeuralNetwork( self, other );
    
    def transfer_fx( self, inputs ):
        """
        Transfer function for the layer.

        args:
            inputs : array

        returns:
            array

        attributes:
            None

        raises:
            None
        """
        # implement the sigmoid transfer function
        return 1 / ( 1 + np.exp( -inputs ) );


    def output( self, inputs ):
        """
        Calculate the output with the activation function and inputs.

        args:
            inputs : array

        returns:
            array

        attributes:
            self.outputs : array

        raises:
            None
        """
        
        #self.activated = self.transfer_fx( self.inputs.output( inputs ) + self.bias );
        self.activated = self.transfer_fx( inputs + self.bias );
        return self.activated;

In [9]:
class WeightLayer( BaseLayer):
    """
    Weight layer for the perceptron.
    """
    
    def __init__( self, src : Layer, dest : Layer ) -> None:
        """
        Constructor for the weight layer.

        args:
            params : dict
            src : Layer
            dest : Layer

        returns:
            None

        attributes:
            self.src : Layer
            self.dest : Layer
            self.input_size : tuple
            self.output_size : tuple
            self.name : str
            self.weights : array

        raises: 
            None
        """
        self.src = src;
        self.dest = dest;

        #print( f"src: {src.node_no}, dest: {dest.node_no}" );
    
        #self.weights = np.random.randn( self.src.node_no, self.dest.node_no );
        #create self.weights to be a matrix of size src.node_no x dest.node_no with random values from a uniform distribution between -1 and 1
        self.weights = np.random.uniform( -1, 1, ( self.src.node_no, self.dest.node_no ) );
        self.name    = "W_%s_%s_layer" % ( self.src.name, self.dest.name );

        self.src.outputs = self;
        self.dest.inputs = self;


    def output( self, inputs ):
        """
        Matrix multiplication between the inputs and the weights.

        args:
            inputs : array

        returns:
            array

        attributes:
            None

        raises:
            None
        """        
        return self.src.output( inputs ) @ self.weights;

In [41]:
class NeuralNetwork( object ):
    """
    This class respresents a Neural Networks.
    """
    
    def __init__( self, layer0, layer1 ):
        """
        Constructor for the Neural Network class.
        Creates a network with an input layer, layer0 and an output layer, layer1.

        args:
            layer0 : object
            layer1 : object

        returns:
            None

        attributes:
            self.hyperparams : dict
            self.layers : list
            self.input_layer : object
            self.output_layer : object
            self.weights : list
            self.loss_fn : object
            self.optimizer : object

        raises:
            None 
        """
        # hyperparameters dictionary
        self.hyperparams = layer0.hyperparams;

        # layers
        self.layers = [ layer0, layer1 ];
        self.input_layer = layer0;
        self.output_layer = self.layers[-1];

        # weights
        self.weights = [ WeightLayer( layer0, layer1 ) ];

        # loss function
        self.loss_fn = None;

        # optimizer
        self.optimizer = None;

        # trainable parameters
        self.trainable_parameters = {};
        for weight in self.weights:
            self.trainable_parameters[weight.name] = weight.weights;
        for layer in self.layers[1:]:
            self.trainable_parameters[layer.name] = layer.bias;
    
    def summary( self ):
        """
        Print the summary of the network.

        args:
            None

        returns:
            None

        attributes:
            None

        raises:
            None
        """
        print( "-------" );
        print( "| Summary |" );
        print( "-------" );
        print( f"Input Layer: { self.input_layer.node_no }" );
        print( f"Hidden Layer: { self.layers[1].node_no }" );
        print( f"Output Layer: { self.output_layer.node_no }" );
    
        print( "-------" );
        print( "| Weights |" );
        print( "-------" );
        for weight_obj in self.weights:
            print( f"{weight_obj.name}: \n {weight_obj.weights} , {weight_obj.weights.shape}" );
        
        print( "-------" );
        print( "| Bias |" );
        print( "------" );
        for layer_obj in self.layers:
            if hasattr( layer_obj, 'bias' ):
                print( f"{layer_obj.name}: \n {layer_obj.bias}, {layer_obj.bias.shape}" );
            else:
                print( f"{layer_obj.name}: " );
        
        print( "---------------" );
        print( "| Hyperparameters |" );
        print( "---------------" );
        print( f"Epochs: {self.hyperparams['epochs']}" );
        print( f"Learning Rate: {self.hyperparams['lr']}" );
        print( f"Minibatch Size: {self.hyperparams['minibatch_size']}" );
        print( "---------------" );

        print( "---------------" );
        print( f"| Loss Function | : {self.loss_fn.name}" );
        print( "---------------" );
        
        print( "---------------" );
        print( f"| Optimizer | : {self.optimizer.name}" );
        print( "---------------" );

        print( "---------------" );
        print( "| Trainable Parameters | ")
        print( "---------------" );
        print( self.trainable_parameters );
    

    

    def __rshift__( self, other ):
        """
        Overwrite the right shift operator to add a layer to the network
        """
        if isinstance( other, Layer ) or isinstance( other, InputLayer ):
            self.weights.append( WeightLayer( self.layers[-1], other ) );
            self.layers.append( other );
            self.output_layer = self.layers[-1];

            self.trainable_parameters[self.weights[-1].name] = self.weights[-1].weights;
            self.trainable_parameters[other.name] = other.bias;

            return self;
    
        if isinstance( other, Loss ):
            self.loss_fn = other;
            return self;

        if isinstance( other, GradientDescent ):
            self.optimizer = other;
            return self;

        else:
            print( type( other ) );

    def output( self, inputs ):
        """
        Calculate the output of the network.

        args:
            inputs : array

        returns:
            array

        attributes:
            None

        raises:
            None
        """
        return self.output_layer.output( inputs );

    def forward( self, inputs ):
        """
        Forward pass through the network.

        args:
            inputs : array

        returns:
            None

        attributes:
            None

        raises:
            None
        """
        for i in range( len( self.weights ) ):
            pre_activation = self.weights[i].output( inputs );
            activated_weights = self.layers[i + 1].output( pre_activation );
            inputs = activated_weights;

        return activated_weights;

    def predict( self, X ):
        """
        Predict the output of the network.

        args:
            X : array

        returns:
            array

        attributes:
            None

        raises:
            None
        """
        return self.forward( X );

    def train( self, X, y ):
        """
        Train the network.

        args:
            X : array
            y : array
            epochs : int
            lr : float

        returns:
            None

        attributes:
            None

        raises:
            None
        """
        for epoch in trange( self.hyperparams['epochs'] ):
            # forward pass
            logits = self.forward( X );

            # calculate the loss
            fwd_loss = self.loss_fn( logits, y, deriv=False );
            print( f"Epoch: { epoch + 1 }, Loss: { fwd_loss }" );


            # backward pass
            #trainable_parameters = self.optimizer.minimize( logits, self.trainable_parameters, self.loss_fn );
            
            # update the weights and biases
            #self.trainable_parameters = trainable_parameters;
            #for index in range( len ( self.weights ) ):
                #self.weights[index].weights = self.trainable_parameters['weights'][index][1];
                #self.layers[index + 1].bias = self.trainable_parameters['biases'][index][1];
                

In [42]:
params = {
    # model hyperparameters
    "input_units" : 2,
    "hidden_units" : 2,
    "output_units" : 1,

    # optimizer hyperparameters
    "lr" : 0.01,

    # training hyperparameters
    "epochs" : 1,
    "minibatch_size" : 4
}

In [43]:
training, testing = load_data( "xor" );
x_train, y_train = training[0], training[1];
x_test, y_test = testing[0], testing[1];

In [44]:
print( f"Training set: \n {x_train}, {x_train.shape}" );
print( f"Labels: \n {y_train}, {y_train.shape}" );

Training set: 
 [[0. 0.]
 [0. 1.]
 [1. 0.]
 [1. 1.]], (4, 2)
Labels: 
 [[0.]
 [1.]
 [1.]
 [0.]], (4, 1)


In [45]:
input_layer = InputLayer( params, "input" );
hidden_layer = Layer( params, "hidden", "sigmoid" );
output_layer = Layer( params, "output", "sigmoid" );

loss = Loss( "cross_entropy" );
optimizer = GradientDescent( params );

In [46]:
nn = input_layer >> hidden_layer >> output_layer >> loss >> optimizer;
nn.summary();

-------
| Summary |
-------
Input Layer: 2
Hidden Layer: 2
Output Layer: 1
-------
| Weights |
-------
W_input_hidden_layer: 
 [[-0.66157275 -0.40900101]
 [-0.63631169 -0.99798354]] , (2, 2)
W_hidden_output_layer: 
 [[0.30716018]
 [0.79441294]] , (2, 1)
-------
| Bias |
------
input: 
hidden: 
 [ 0.59309058 -0.02415929], (2,)
output: 
 [-0.04661841], (1,)
---------------
| Hyperparameters |
---------------
Epochs: 1
Learning Rate: 0.01
Minibatch Size: 4
---------------
---------------
| Loss Function | : cross_entropy
---------------
---------------
---------------
---------------
| Trainable Parameters | 
---------------
{'W_input_hidden_layer': array([[-0.66157275, -0.40900101],
       [-0.63631169, -0.99798354]]), 'hidden': array([ 0.59309058, -0.02415929]), 'W_hidden_output_layer': array([[0.30716018],
       [0.79441294]]), 'output': array([-0.04661841])}


In [34]:
nn.train( x_train, y_train );

100%|██████████| 1/1 [00:00<00:00, 2628.01it/s]

Epoch: 1, Loss: 0.7417447694348323





In [None]:
nn.predict( x_test );