# Successive Convex Approximation

In [2]:

#Two different types of the python program.
#1-Training without succesive batch.
#2-Training with successive batch. 




In [3]:
# Random selection Dataset generation for both program

import numpy as np
from random import random
inputs = np.array([[random()/2 for i in range(2)] for j in range(1000)])
inputs[:50]

array([[0.3703511 , 0.47352163],
       [0.03080801, 0.28094676],
       [0.34556038, 0.00361325],
       [0.24914238, 0.35910704],
       [0.2490735 , 0.27251771],
       [0.21478062, 0.47246284],
       [0.49179122, 0.00746667],
       [0.21584044, 0.24365182],
       [0.3628567 , 0.35567743],
       [0.34362665, 0.01648451],
       [0.211226  , 0.13968168],
       [0.47604128, 0.30203129],
       [0.45928186, 0.14746859],
       [0.02900853, 0.20056201],
       [0.49196311, 0.33029864],
       [0.38040862, 0.18629717],
       [0.23143768, 0.44584104],
       [0.40152478, 0.37383465],
       [0.26334988, 0.31957479],
       [0.41483768, 0.28644245],
       [0.06785831, 0.25617856],
       [0.21668468, 0.22248738],
       [0.2215131 , 0.47371242],
       [0.10130332, 0.25744155],
       [0.39926845, 0.17975317],
       [0.37922717, 0.27359546],
       [0.01657946, 0.36368681],
       [0.37706797, 0.14695061],
       [0.3043554 , 0.2358366 ],
       [0.16008393, 0.16985629],
       [0.

In [4]:
# generating random weights for both the programs
layers = [2,7,1]
weight = []
for i in range(len(layers)-1):
    w = np.random.randn(layers[i] , layers[i+1] )
    #w = np.array([[random()/2 for _ in range(layers[i+1])] for _ in range(layers[i])])
    weight.append(w)
weight

[array([[-0.7586089 , -0.42083518, -1.27812552,  0.94597251, -0.88909951,
         -0.15487099, -0.10478637],
        [ 1.08443659, -0.23778969, -1.09608714, -0.87859069,  0.0367101 ,
         -0.74550382,  0.64938824]]), array([[ 0.57068241],
        [-0.17589568],
        [ 0.87764973],
        [ 0.37338807],
        [ 0.71101414],
        [-0.69368014],
        [-0.08083871]])]

In [5]:
'''
Stochastic Training of Neural Networks without
Successive Batch
'''
import numpy as np
from random import random

class MLP():

    def __init__(self, num_inputs=3, num_hidden=[3,5], num_outputs=3, weight = 0):

        self.num_inputs = num_inputs
        self.num_hidden = num_hidden
        self.num_outputs = num_outputs
        self.weights = weight
        layers = [self.num_inputs] + self.num_hidden + [self.num_outputs]
        
        '''
        weights = []
        for i in range(len(layers)-1):
            w = np.random.randn(layers[i] , layers[i+1] )
            weights.append(w)
        self.weights = weights
        # print("weights: ", weights)
        '''
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations
        # print("activations: ", activations)

        derivatives = []
        for i in range(len(layers)-1):
            d = np.zeros((layers[i], layers[i+1]))
            derivatives.append(d)
        self.derivatives = derivatives
        # print("derivatives: ", derivatives)

    def forward(self, inputs):
        
        activations = inputs
        self.activations[0] = activations
        for i, w in enumerate(self.weights):
            net_inputs = np.dot(activations, w)
            # print("net inputs:", net_inputs)
            activations = self.tanh(net_inputs)
            self.activations[i+1] = activations           
            # print("activations: {} ".format(activations))
            # print("final activations:", activations)
        return activations


    def back_prop(self, error):
        for i in reversed(range(len(self.derivatives))):
            activations = self.activations[i+1]
            #print("activations:", activations)
            # delta:= error.dedrivative_of_activation_function
            delta = error * self.tanh_derivative(activations)
            #print("delta: ", delta)
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            #print("delta reshaped:" , delta_reshaped)
            current_activations = self.activations[i]
            current_activations_reshaped = current_activations.reshape(current_activations.shape[0],-1)
            #print("current activations:", current_activations)
            #print("current activations reshaped", current_activations_reshaped)
            self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped)
            # error := error. derivative_of_activation_function.weights
            # delta := error. derivative_of_activation_function
            # error := delta.weights
            error = np.dot(delta, self.weights[i].T)
            #print(error)
            #print("derivatives:", self.derivatives[i])
        return error

    def training(self, inputs, targets, epochs, learning_rate):
        for i in range(epochs):
            sum_error = 0
            for input, target in zip(inputs, targets):

                output = self.forward(input)
                error = output - target
                self.back_prop(error)
                self.gradient_descent(learning_rate)
                sum_error += self.mean_square_error(target, output)
            print("Error: {} at Epoch {}".format(sum_error / len(inputs), i+1))

    def mean_square_error(self, target, output):
        return np.average((output - target)**2)

    def gradient_descent(self, learning_rate):
        for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            weights -= derivatives * learning_rate 


    def tanh(self, x):
        return (np.exp(x)-np.exp(-x)) / (np.exp(x)+np.exp(-x))

    def tanh_derivative(self, x):
        return (1-x**2)
   

if __name__ == "__main__":
    mlp = MLP(2,[7],1, weight)
    #inputs = np.random.rand(mlp.num_inputs)
    #inputs = np.array([1, 2, 3])
    #inputs = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
    targets = np.array([[i[0] + i[1]] for i in inputs])
    mlp.training(inputs, targets, 100, .8)
    pre_input = np.array([0.3, 0.4])
    prediction = mlp.forward(pre_input)
    #targets = np.array([6])
    print("Withoud Batch Our Neural Network Shows: {} + {} = {}".format(pre_input[0], pre_input[1], prediction[0]))
    #print("the network input is: {}" .format(inputs)) 
    #print("the network output is: {}" .format(outputs)) 
    


Error: 0.004171230356743799 at Epoch 1
Error: 0.0016566752242379024 at Epoch 2
Error: 0.0010282145873621205 at Epoch 3
Error: 0.0005474652030198103 at Epoch 4
Error: 0.0004079193617664748 at Epoch 5
Error: 0.00037905336908095657 at Epoch 6
Error: 0.0003661384661497067 at Epoch 7
Error: 0.0003571742852962757 at Epoch 8
Error: 0.0003503181903758817 at Epoch 9
Error: 0.00034482895105850017 at Epoch 10
Error: 0.00034027738233582977 at Epoch 11
Error: 0.00033639496391375286 at Epoch 12
Error: 0.0003330068683947594 at Epoch 13
Error: 0.0003299954404675235 at Epoch 14
Error: 0.00032727914443663836 at Epoch 15
Error: 0.00032479996577517204 at Epoch 16
Error: 0.00032251562191875824 at Epoch 17
Error: 0.0003203946057134842 at Epoch 18
Error: 0.0003184129501152662 at Epoch 19
Error: 0.00031655206817461277 at Epoch 20
Error: 0.0003147972814818136 at Epoch 21
Error: 0.0003131367991629749 at Epoch 22
Error: 0.0003115609975730061 at Epoch 23
Error: 0.0003100619042568179 at Epoch 24
Error: 0.000308632

In [6]:
pre_input = np.array([0.3, 0.5])
prediction = mlp.forward(pre_input)
    #targets = np.array([6])
print("Without Batch Our Neural Network Shoews: {} + {} = {}".format(pre_input[0], pre_input[1], prediction[0]))

Without Batch Our Neural Network Shoews: 0.3 + 0.5 = 0.7931273966326561


In [7]:
'''
Stochastic Training of Neural Networks via
Successive Convex Approximation
'''
import numpy as np
from random import random
from matplotlib import pyplot as plt
%matplotlib inline

class MLP_B():

    def __init__(self, num_inputs=3, num_hidden=[3,5], num_outputs=3, weight = 0):

        self.num_inputs = num_inputs
        self.num_hidden = num_hidden
        self.num_outputs = num_outputs
        self.weights = weight
        layers = [self.num_inputs] + self.num_hidden + [self.num_outputs]
        
        '''
        weights = []
        for i in range(len(layers)-1):
            w = np.random.randn(layers[i] , layers[i+1] )
            weights.append(w)
        self.weights = weights
        # print("weights: ", weights)
        '''
        
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations
        # print("activations: ", activations)

        derivatives = []
        for i in range(len(layers)-1):
            d = np.zeros((layers[i], layers[i+1]))
            derivatives.append(d)
        self.derivatives = derivatives
        # print("derivatives: ", derivatives)

    def forward(self, inputs):
        
        activations = inputs
        self.activations[0] = activations
        for i, w in enumerate(self.weights):
            net_inputs = np.dot(activations, w)
            # print("net inputs:", net_inputs)
            activations = self.tanh(net_inputs)
            self.activations[i+1] = activations           
            # print("activations: {} ".format(activations))
            # print("final activations:", activations)
        return activations


    def back_prop(self, error):
        for i in reversed(range(len(self.derivatives))):
            activations = self.activations[i+1]
            #print("activations:", activations)
            # delta:= error.dedrivative_of_activation_function
            delta = error * self.tanh_derivative(activations)
            #print("delta: ", delta)
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            #print("delta reshaped:" , delta_reshaped)
            current_activations = self.activations[i]
            current_activations_reshaped = current_activations.reshape(current_activations.shape[0],-1)
            #print("current activations:", current_activations)
            #print("current activations reshaped", current_activations_reshaped)
            self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped)
            # error := error. derivative_of_activation_function.weights
            # delta := error. derivative_of_activation_function
            # error := delta.weights
            error = np.dot(delta, self.weights[i].T)
            #print(error)
            #print("derivatives:", self.derivatives[i])
        return error

    def training(self, inputs, targets, epochs, learning_rate):
        for i in range(epochs):
            #sum_error = 0
            batch_size = 100
            for step in range(int(len(inputs)/batch_size)):
                sum_error = 0
                first_index = 0
                last_index = batch_size
                batch_input = inputs[first_index: last_index]
                batch_target = targets[first_index: last_index]

                for input, target in zip(batch_input, batch_target):
                    output = self.forward(input)
                    error = output - target
                    self.back_prop(error)
                    self.gradient_descent(learning_rate)
                    sum_error += self.mean_square_error(target, output)
                    
                first_index = last_index
                last_index += batch_size 
                #print("Error: {} at batch {}".format(sum_error / batch_size, step+1))
               
                
  
            print("Error: {} at Epoch {}".format(sum_error / len(inputs), i+1))
            #plt.plot(sum_error)

    def mean_square_error(self, target, output):
        return np.average((output - target)**2)

    def gradient_descent(self, learning_rate):
        for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            weights -= derivatives * learning_rate 


    def tanh(self, x):
        return (np.exp(x)-np.exp(-x)) / (np.exp(x)+np.exp(-x))

    def tanh_derivative(self, x):
        return (1-x**2)
   


if __name__ == "__main__":
    mlp_b = MLP_B(2,[7],1 ,weight )
    #inputs = np.random.rand(mlp.num_inputs)
    #inputs = np.array([1, 2, 3])
    #inputs = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
    targets = np.array([[i[0] + i[1]] for i in inputs])
    mlp_b.training(inputs, targets, 100, .8)
    pre_input = np.array([0.3, 0.4])
    prediction = mlp_b.forward(pre_input)
    #targets = np.array([6])
    print("Using Batch Our Neural network Shows that {} + {} = {}".format(pre_input[0], pre_input[1], prediction[0]))
    #print("the network input is: {}" .format(inputs)) 
    #print("the network output is: {}" .format(outputs)) 
  


Error: 4.1746689682785874e-05 at Epoch 1
Error: 4.176025738073586e-05 at Epoch 2
Error: 4.171603909105809e-05 at Epoch 3
Error: 4.167097710731389e-05 at Epoch 4
Error: 4.1630351646294155e-05 at Epoch 5
Error: 4.1593129400068155e-05 at Epoch 6
Error: 4.1558283632286634e-05 at Epoch 7
Error: 4.1525164492208936e-05 at Epoch 8
Error: 4.149337952950169e-05 at Epoch 9
Error: 4.146269043979902e-05 at Epoch 10
Error: 4.1432949115350426e-05 at Epoch 11
Error: 4.140405984578324e-05 at Epoch 12
Error: 4.137595724337825e-05 at Epoch 13
Error: 4.134859344432043e-05 at Epoch 14
Error: 4.132193074120469e-05 at Epoch 15
Error: 4.129593737654703e-05 at Epoch 16
Error: 4.127058516426323e-05 at Epoch 17
Error: 4.124584816046767e-05 at Epoch 18
Error: 4.1221701931290826e-05 at Epoch 19
Error: 4.11981231566126e-05 at Epoch 20
Error: 4.117508942007553e-05 at Epoch 21
Error: 4.115257910037448e-05 at Epoch 22
Error: 4.1130571316067135e-05 at Epoch 23
Error: 4.110904589746497e-05 at Epoch 24
Error: 4.108798337

In [8]:
pre_input = np.array([0.30,  0.3])
prediction_b = mlp_b.forward(pre_input)
    #targets = np.array([6])
print("With Batch Our Neural network Shows that {} + {} = {}".format(pre_input[0], pre_input[1], prediction_b[0]))

With Batch Our Neural network Shows that 0.3 + 0.3 = 0.6081831904754302


In [9]:
'''
Stochastic Training of Neural Networks via
Successive Convex Approximation
'''
import numpy as np
from random import random

class MLP():

    def __init__(self, num_inputs=3, num_hidden=[3,5], num_outputs=3, weight = 0):

        self.num_inputs = num_inputs
        self.num_hidden = num_hidden
        self.num_outputs = num_outputs
        self.weights = weight
        layers = [self.num_inputs] + self.num_hidden + [self.num_outputs]
        
        '''
        weights = []
        for i in range(len(layers)-1):
            w = np.random.randn(layers[i] , layers[i+1] )
            weights.append(w)
        self.weights = weights
        # print("weights: ", weights)
        '''
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations
        # print("activations: ", activations)

        derivatives = []
        for i in range(len(layers)-1):
            d = np.zeros((layers[i], layers[i+1]))
            derivatives.append(d)
        self.derivatives = derivatives
        # print("derivatives: ", derivatives)

    def forward(self, inputs):
        
        activations = inputs
        self.activations[0] = activations
        for i, w in enumerate(self.weights):
            net_inputs = np.dot(activations, w)
            # print("net inputs:", net_inputs)
            activations = self.tanh(net_inputs)
            self.activations[i+1] = activations           
            # print("activations: {} ".format(activations))
            # print("final activations:", activations)
        return activations


    def back_prop(self, error):
        for i in reversed(range(len(self.derivatives))):
            activations = self.activations[i+1]
            #print("activations:", activations)
            # delta:= error.dedrivative_of_activation_function
            delta = error * self.tanh_derivative(activations)
            #print("delta: ", delta)
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            #print("delta reshaped:" , delta_reshaped)
            current_activations = self.activations[i]
            current_activations_reshaped = current_activations.reshape(current_activations.shape[0],-1)
            #print("current activations:", current_activations)
            #print("current activations reshaped", current_activations_reshaped)
            self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped)
            # error := error. derivative_of_activation_function.weights
            # delta := error. derivative_of_activation_function
            # error := delta.weights
            error = np.dot(delta, self.weights[i].T)
            #print(error)
            #print("derivatives:", self.derivatives[i])
        return error

    def training(self, inputs, targets, epochs, learning_rate):
        for i in range(epochs):
            sum_error = 0
            for input, target in zip(inputs, targets):

                output = self.forward(input)
                error = output - target
                self.back_prop(error)
                self.gradient_descent(learning_rate)
                sum_error += self.mean_square_error(target, output)
            #print("Error: {} at Epoch {}".format(sum_error / len(inputs), i+1))
    
    def training_batch(self, inputs, targets, epochs, learning_rate):
        for i in range(epochs):
            #sum_error = 0
            batch_size = 100
            for step in range(int(len(inputs)/batch_size)):
                sum_error = 0
                first_index = 0
                last_index = batch_size
                batch_input = inputs[first_index: last_index]
                batch_target = targets[first_index: last_index]

                for input, target in zip(batch_input, batch_target):
                    output = self.forward(input)
                    error = output - target
                    self.back_prop(error)
                    self.gradient_descent(learning_rate)
                    sum_error += self.mean_square_error(target, output)
                    
                first_index = last_index
                last_index += batch_size 
                #print("Error: {} at batch {}".format(sum_error / batch_size, step+1))
               
                
  
            #print("Error: {} at Epoch {}".format(sum_error / len(inputs), i+1))
            #plt.plot(sum_error)

    def mean_square_error(self, target, output):
        return np.average((output - target)**2)

    def gradient_descent(self, learning_rate):
        for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            weights -= derivatives * learning_rate 


    def tanh(self, x):
        return (np.exp(x)-np.exp(-x)) / (np.exp(x)+np.exp(-x))

    def tanh_derivative(self, x):
        return (1-x**2)
   

if __name__ == "__main__":
    mlp = MLP(2,[7],1, weight)
    #inputs = np.random.rand(mlp.num_inputs)
    #inputs = np.array([1, 2, 3])
    #inputs = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
    targets = np.array([[i[0] + i[1]] for i in inputs])
    mlp.training(inputs, targets, 100, .8)
    pre_input = np.array([0.3, 0.4])
    prediction = mlp.forward(pre_input)
    #targets = np.array([6])
    print("without Batch Our Neural Network Shows: {} + {} = {}".format(pre_input[0], pre_input[1], prediction[0]))
    #print("the network input is: {}" .format(inputs)) 
    #print("the network output is: {}" .format(outputs)) 
    mlp_b = MLP(2,[7],1 ,weight )
    #inputs = np.random.rand(mlp.num_inputs)
    #inputs = np.array([1, 2, 3])
    #inputs = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
    targets = np.array([[i[0] + i[1]] for i in inputs])
    mlp_b.training_batch(inputs, targets, 100, .8)
    pre_input = np.array([0.3, 0.4])
    prediction = mlp_b.forward(pre_input)
    #targets = np.array([6])
    print("Using Batch Our Neural Network Shows: {} + {} = {}".format(pre_input[0], pre_input[1], prediction[0]))
    #print("the network input is: {}" .format(inputs)) 
    #print("the network output is: {}" .format(outputs)) 
    


without Batch Our Neural Network Shows: 0.3 + 0.4 = 0.7192884534498386
Using Batch Our Neural Network Shows: 0.3 + 0.4 = 0.7048056966314775


# Conclusion:

In [10]:
#Using Successive Batch for the traing Neural Network the Result 
#is better than the trainng Neural Network without successive Batch
#keeping all other hyperparameters same.