In [None]:
#necessary libraries

import tensorflow as tf
import numpy      as np

'''class MLP which contains all methods needed to train and test the performance of your model'''
class MLP:
    
    '''initializer function which holds all the hyperparameters''' 
    def __init__(self, 
                 input_size      = 100, # sepcific to the size of input vector
                 output_size     = 2,   # specific to the size of output vector  
                 mini_batch_size = 100, # learning is done via mini batches, this specifies their size. = 1 is SGD
                 layers          = [10, 10], # the hidden layer structure of the MLP, a list where len(list) = # of hidden layers
                 l_r             = 0.0001, # starting learning rate 
                 kp              = 0.95,  # keep probability of dropout regularizer, 1 = no dropout applied
                 l1              = 0.001, # constant multiplier applied to L1 regularizer, l1 = 0 = no L1 regularization
                 l2              = 0.001, # constant multiplier applied to L2 regularizer, l2 = 0 = no L2 regularization
                 activation      = 'sigmoid'): # activation function possible inputs are 'sigmoid','relu','tanh'
        
        
        # setting all the input variables as global 
        self.mini_batch_size = mini_batch_size
        self.kp = kp
        self.l1 = l1
        self.l2 = l2
        
        self.input_size = input_size
        self.output_size = output_size
        
        self.layers = layers
        self.l_r = l_r
        
        self.activation = activation
        
        # initializing lists which will hold network weights biases z's and layer activations
        self.W = []
        self.B = []
        self.z = []
        self.a = []
        # initializing list which will hold the errors as learning progresses
        self.train_memory = []
        self.valid_memory = []
        self.test_memory  = []

    '''here we define the tensorflow model function, a bunch of mathematical operations to be performed on the input'''   
    def model(self): 
        
        # these are pllaceholders, a way of telling tensorflow that he is to expect some data of this shape
        self.inputs = tf.placeholder(shape = [None, self.input_size] , dtype = 'float32')
        self.label  = tf.placeholder(shape = [None, self.output_size], dtype = 'float32')
        self.lr     = tf.placeholder(tf.float32, shape = ())
        
        
        self.a.append(self.inputs)
        
        in_size  = self.inputs.shape[1]
        out_size = self.label.shape[1]
        
        # here we calculate everything about the hidden layers
        for i in range(len(self.layers)):
            
            self.W.append(tf.Variable(tf.zeros(shape = [in_size, self.layers[i]], dtype = 'float32')))
            self.B.append(tf.Variable(tf.zeros(shape = [self.layers[i]],          dtype = 'float32')))

            self.z.append(tf.add(tf.matmul(self.a[i], self.W[i]), self.B[i]))

            if self.activation == 'relu': 
                self.a.append(tf.nn.dropout(tf.nn.relu(self.z[i]),keep_prob = self.kp))
            elif self.activation == 'sigmoid':
                self.a.append(tf.nn.dropout(tf.sigmoid(self.z[i]),keep_prob = self.kp))
            elif self.activation == 'tanh':
                self.a.append(tf.nn.dropout(tf.tanh(self.z[i]),   keep_prob = self.kp))
                
            in_size = self.layers[i]

        # here we calculate everything about the output layer
        self.W.append(tf.Variable(tf.zeros(shape = [in_size, out_size], dtype = 'float32')))
        self.B.append(tf.Variable(tf.zeros(shape = [out_size],          dtype = 'float32')))

        self.z.append(tf.add(tf.matmul(self.a[-1], self.W[-1]), self.B[-1]))
        self.a.append(self.z[-1])
        
        # final output prediction
        self.yo = self.a[-1]

        # here we flatten the weights to prepare for calculation for regularizers
        weights = []
        
        for i, w in enumerate(self.W):
            weights += [tf.reshape(tensor = w, shape = [-1])]
            
        weights = tf.concat(weights, axis = 0)           
        
        
        # here we define our loss function, we have two since we declare MSE error, but train on regularized loss
        self.mse       = tf.reduce_mean(tf.squared_difference(self.yo,self.label))
        self.loss      = self.mse + self.l2 * tf.reduce_mean(tf.square(weights)) + self.l1 * tf.reduce_mean(tf.abs(weights))
        
        # here we specify out optimizer and the loss function which we want to minimise
        self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.loss)

        # here we initialize the tensorflow global variables
        self.init  = tf.global_variables_initializer()
        
    '''A function which creates a subsample of input data'''
    def minibatch(self, X_batch, Y_batch):
        
        mask = np.random.choice(len(X_batch), self.mini_batch_size)
        
        mini_batch_X = X_batch[mask]
        mini_batch_Y = Y_batch[mask]
        
        return mini_batch_X, mini_batch_Y
    
    
    '''A function which determines the mean squared error of the prediction vs ground truth'''
    def evaluate(self, X, Y):
        return self.sess.run(self.mse , feed_dict={self.inputs : X, self.label  : Y})
    
    '''The train function which takes in the training data and trains the model, 
       it also calculates the error on validation and test data'''
    def train(self, X_train, Y_train, X_valid, Y_valid, X_test, Y_test, epochs = 1):
        
        total_batch = int(len(X_batch)/self.mini_batch_size)
        
        for epoch in range(epochs): # epochs are how many times we go through the whole dataset
            
            print('Epoch: {:3d}    Learning Rate: {:.10f}'.format(epoch, self.l_r/(10*(epoch+1))))
            
            for i in range(total_batch): # inside one epoch we perform as many minibatches as you can fit into the whole dataset 
                
                mini_X_batch, mini_Y_batch = self.minibatch(X_train, Y_train)
              
                avg_cost = 0.0
                
                # the tensorflow optimisation is run, also mean squared error is print out. learning rate is annealing
                _, c = self.sess.run([self.optimizer, self.mse],  feed_dict = {self.lr     : self.l_r/(10*(epoch+1)), 
                                                                               self.inputs : mini_X_batch,
                                                                               self.label  : mini_Y_batch})
                avg_cost += c
            
            avg_cost /= total_batch    

            if self.verbose == True: # if you want to see progress of each epoch you can
                print("Epoch: {:3d}    Train MSE: {:.8f}".format(epoch, avg_cost))
            
            #after each training epoch have elapsed we perform an evaluation on validation and test datasets
            validation_mse = self.evaluate(X_valid, Y_valid)
            test_mse       = self.evaluate(X_test,  Y_test)
            
            # we append the results from the training epoch to a list, 
            # so we can plot a graph of error w.r.t. learning fate for example
            self.train_memory.append(avg_cost)
            self.valid_memory.append(validation_mse)
            self.test_memory.append(test_mse)
        
            # i break the epoch loop if the training error is really small, you might want to comment it out
            if avg_cost < 1e-7:
                break
                
        return avg_cost


    '''a function where all the other functions are called'''
    def learn(self, 
              X_train, Y_train, 
              X_valid, Y_valid, 
              X_test, Y_test, 
              epochs = 1, verbose = False):
        
        # we make sure tensorflow forgets everything
        tf.reset_default_graph()
        self.verbose = verbose
        # calling the model fucntion to initialize model and variables
        self.model()
        # create and open a tensorflow session
        self.sess = tf.Session()
        # run the tensorflow global variables initializer 
        self.sess.run(self.init)
        
        
        train_mse      = self.train(X_train, Y_train, X_valid, Y_valid, X_test, Y_test, epochs) 
        validation_mse = self.evaluate(X_valid, Y_valid)
        test_mse       = self.evaluate(X_test,  Y_test)
        
        self.sess.close()
        
        return train_mse, validation_mse, test_mse
    

'''To execute code above using the methods you might want to do something like this'''
'''We create the class and initialize main parameters'''
mpl = MLP(input_size     = 100,
          output_size    = 5,
          layers         = [10,10,10], 
          l_r            = 0.000001, 
          activation     = 'sigmoid', 
          mini_batch_size= 50, 
          kp             = 0.9, 
          l1             = 1, 
          l2             = 1)
'''We call the learn method of mlp class which returns three mean squared errors'''
train_error, validation_error, test_error = mpl.learn(my_X_train     , my_Y_train,                 
                                                      my_X_validation, my_Y_validation,
                                                      my_X_test      , my_Y_test,
                                                      epochs = 50, verbose = False)