In [3]:
import numpy as np
import logging 
import matplotlib.pyplot as plt 
import copy

In [4]:
# logging.config()

In [5]:
def ReLU(x,leaky=False,negative_slope=None): # leaky is a flag for leaky relu, negative slope for leaky relu. 
    if leaky and negative_slope!=None:
        try:
            if negative_slope>0:
                raise ValueError("Negative slope accepts integers only.")            
            return np.where(x<0,x*negative_slope,x)
        
        except ValueError as ve:
            print(f"an unexpected error{ve}")
            
        except Exception as e:            
            print(f"An unexpected error {e}")
            raise 
    return np.maximum(0,x)


In [6]:
def gradient_ReLU(x):
    return np.where(x>0,1,0)

In [7]:
def normal_initialization(fraction,weight_size):
    fract=np.sqrt(2/fraction)
    return np.random.normal(0,fract,size=weight_size)

In [8]:
def uniform_initialization(fraction,weight_size):
    fract= np.sqrt(6/fraction)
    return np.random.uniform(-fract,fract,size=weight_size)


In [9]:
method_map={
        "uniform":uniform_initialization,
        "normal":normal_initialization
    }

In [10]:
def initialize_layer(layer_in,layer_out,activation="linear",mode="he",method="uniform",bias=True):
    """ Returns weights and biases initialized as response to input information

    Args:
        layer_in (_type_): _description_
        layer_out (_type_): _description_
        mode (str, optional): _description_. Defaults to "he".
        method (str, optional): _description_. Defaults to "uniform".

    Raises:
        ValueError: _description_
    """
    
    
    
    weights=np.zeros((layer_in,layer_out))
    if bias:
        biases=np.zeros((1,layer_out))
    
    
    try:        
        calcluation=0
        
        if mode.lower()=='random': return np.random.random(size=(layer_in,layer_out))
        
        elif mode.lower()=="he":
            calcluation=1/(np.sqrt(layer_in+layer_out))
            
        elif mode.lower()=="xavier":
            calcluation=1/(np.sqrt(layer_in))
            
        else:
            raise ValueError("Only accepts 'random','he' and 'xavier' string as arguments.")

        weights=method_map[method](calcluation,(layer_in,layer_out))
        
        
       
    except Exception as e:
        print(f"Error occured: {e}")    

    
    return {"weight":weights,"bias":biases,"activation":activation}
    

In [11]:
layer=(2,3)
val=initialize_layer(2,3,method="uniform")
val['weight']


array([[-1.76644102, -2.97097569,  3.55649058],
       [-3.5330374 , -1.85551649,  2.40429865]])

In [12]:
custom_hidden_layers=[
    (8,"relu"),
    (4,"relu"),
    (2,"relu"),
    (1,"linear")
    ]
custom_X=np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15]]) # 5 x 3 
custom_Y=np.array([1,2,3,4,5])


In [13]:
def build_network(hidden_layers,X):
    neural_network={}
    new_hidden=hidden_layers.copy()
    new_hidden.insert(0,(X.shape[1],"relu"))
    
    for layer in range(len(hidden_layers)):
        neural_network[f"Layer{layer}"]=initialize_layer(new_hidden[layer][0],new_hidden[layer+1][0],activation=new_hidden[layer+1][1])
    return neural_network
    

In [14]:
network=build_network(hidden_layers=custom_hidden_layers,X=custom_X)
network

{'Layer0': {'weight': array([[ 1.04116575,  2.15237706,  3.59967635, -1.76577998,  4.22794401,
           0.15562409,  2.1893795 , -3.3941601 ],
         [ 0.66064513,  1.98954149,  4.08141815,  0.15174739,  3.6854873 ,
           2.71298353, -2.64028752,  1.17554755],
         [-1.03757976, -0.74582748, -4.17602691,  3.6918322 ,  1.4927271 ,
          -0.79258595,  0.1156259 , -3.20086966]]),
  'bias': array([[0., 0., 0., 0., 0., 0., 0., 0.]]),
  'activation': 'relu'},
 'Layer1': {'weight': array([[-2.02734327,  2.82540746, -1.79601613, -1.19294662],
         [ 4.05671061, -4.27405414, -1.48752994,  3.67603107],
         [-4.14297448, -1.92521729,  3.42609232, -1.75662326],
         [ 2.3989703 , -3.57874708, -0.96990567,  4.45932537],
         [-4.43116923,  2.67143439, -0.21314367,  2.92919716],
         [ 0.83684909, -0.68345343,  3.09800708, -0.35079169],
         [ 0.70965904, -0.68028242, -3.50899395,  0.64713595],
         [-2.5728746 , -3.36415233, -2.67990382, -0.53752185]]),

In [15]:
network['Layer0']

{'weight': array([[ 1.04116575,  2.15237706,  3.59967635, -1.76577998,  4.22794401,
          0.15562409,  2.1893795 , -3.3941601 ],
        [ 0.66064513,  1.98954149,  4.08141815,  0.15174739,  3.6854873 ,
          2.71298353, -2.64028752,  1.17554755],
        [-1.03757976, -0.74582748, -4.17602691,  3.6918322 ,  1.4927271 ,
         -0.79258595,  0.1156259 , -3.20086966]]),
 'bias': array([[0., 0., 0., 0., 0., 0., 0., 0.]]),
 'activation': 'relu'}

In [16]:
def forward_pass(input_network,X):
    
    
    network=input_network.copy()
    network_values={}
    
    result=X @ network['Layer0']["weight"] + network['Layer0']["bias"]
    
    network_values['Layer0']=[result]
    
    if network['Layer0']['activation']:
        result=ReLU(result)
        
    network_values['Layer0'].append(result)
    
    network.pop('Layer0')
    
    count=1
    for layer_name in network:
        
        params = network[layer_name]
        W = params["weight"]
        b = params["bias"]
        result=result @ W +b
        network_values[layer_name]=[result]
        count+=1
        
        if params["activation"].lower()=="relu":
            result=ReLU(result)
            
        network_values[layer_name].append(result)
        
        print(f"Result in layer {layer_name} : {result}")
    print(f"Total calculations: {count}")    
    return network_values 

In [17]:
forward_result=forward_pass(network,X=custom_X)
forward_result

Result in layer Layer1 : [[  0.           0.           0.         103.15193737]
 [  0.           0.          14.63303934 230.26525968]
 [  0.           0.          39.15901516 355.13871941]
 [  0.           0.          63.68499098 480.01217914]
 [  0.           0.          88.2109668  604.88563886]]
Result in layer Layer2 : [[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]
Result in layer Layer3 : [[0.]
 [0.]
 [0.]
 [0.]
 [0.]]
Total calculations: 4


{'Layer0': [array([[ -0.75028327,   3.89397759,  -0.76556808,   9.61321141,
           16.07709991,   3.20383328,  -2.74431785, -10.64567398],
         [  1.2424101 ,  14.08225078,   9.7496347 ,  15.84661027,
           44.29557514,   9.43189827,  -3.75016422, -26.90412061],
         [  3.23510346,  24.27052397,  20.26483748,  22.08000912,
           72.51405036,  15.65996326,  -4.75601059, -43.16256724],
         [  5.22779683,  34.45879716,  30.78004026,  28.31340797,
          100.73252559,  21.88802825,  -5.76185696, -59.42101388],
         [  7.22049019,  44.64707035,  41.29524304,  34.54680682,
          128.95100081,  28.11609324,  -6.76770333, -75.67946051]]),
  array([[  0.        ,   3.89397759,   0.        ,   9.61321141,
           16.07709991,   3.20383328,   0.        ,   0.        ],
         [  1.2424101 ,  14.08225078,   9.7496347 ,  15.84661027,
           44.29557514,   9.43189827,   0.        ,   0.        ],
         [  3.23510346,  24.27052397,  20.26483748,  22.0

In [18]:
def loss_function(y_true,y_pred,type='mse'):
    y_true=y_true.reshape(-1,1)
    y_pred=y_pred.reshape(-1,1)
    # print(y_true.shape,y_pred.shape)
    loss=np.sum((y_true-y_pred)**2)
    return loss/2

In [19]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

In [20]:
def derive_ReLU(activation):
    return np.where(activation>0,1,0)


In [21]:
def derive_linear(activation):
    return activation


In [22]:
def derive_sigmoid(activation):
    return activation * (1-activation)

In [76]:
def gradient_update(network,X,y,result):
    """
    Docstring for gradient_update
    
    :param network: Network structure of our NN, needed for updating network parameter
    :param X: For Forward pass
    :param y: For calculating loss of prediction
    :param result: Contains Activation and Linear combination values for all layers in forward pass
    """
    activations=[activations for (Z,activations) in result.values()]
    activations.insert(0,X) # for final input layer calculation streamlining 
    output=activations[-1].reshape(-1,1) # final value of activations is being used as output
    y=y.reshape(-1,1)
    
    Z=[Z for (Z,activations) in result.values()]
    
    # Z, activations = map(list, zip(*result.values()))

    #dL/dZ of previous layers stored, this will be backpropagated through all layers
    # multiply by weights for getting gradients for weights, these will be default updates for biases
    linear_error_diff=[] 
    
    loss_value=loss_function(y,output) # considering MSE
    print(f"Current loss: {loss_value}")
    
    if network[f'Layer{len(network)-1}']['activation'].lower()=="relu":
        activation_grad=gradient_ReLU(Z[-1])
    else: # currently only considering linear and relu, if not relu then linear which is the same 
        activation_grad=1
        
    output_loss_diff= output - y # y_pred - y_true , MSE differential for final linear layer
    
    loss_by_Z=output_loss_diff * activation_grad
    linear_error_diff.append(loss_by_Z)
    
   
    
    
    for i in range(1,len(activations)+1):
        
        current_layer_activation=network[f'Layer{len(network)-i}']['activation'].lower()
        if current_layer_activation=='relu':
            activation_grad=gradient_ReLU(activations[-(i+1)]) #current layer activation grad
        
        forward_layer_error=linear_error_diff[0] # dl/dz for forward layer
        forward_layer_weight=network[f"Layer{len(network)-i}"]["weight"].T # Transposed the weights of forward layer
        
        print("fle",forward_layer_error.shape)
        print("flw",forward_layer_weight.shape)
        # print("flw",activation_grad)
        
        error_Z_by_layer= forward_layer_error @ forward_layer_weight * activation_grad
        print("le",error_Z_by_layer)
        #dL/dZ for current layer= dl/dZ of forward layer x weights of forward layer x relu grad/ grad of current layer activation
        # for dL/dW of this layer, multiiply this by weights of this layer,
        # fo dl/lB of this layer, multiply this by bias of this layer
        # for backprop, element wise multiplication with activations, matrix multiplications with weights 
        linear_error_diff=[error_Z_by_layer] + linear_error_diff
        # print(linear_error_diff, "le array baybe")
    # print(len(network))
    # print(len(result))
    print(error_Z_by_layer," er / layer")
    return error_Z_by_layer
    
    
    

In [77]:
gradient_update(network=network,X=custom_X,y=custom_Y,result=forward_result)

Current loss: 27.5
fle (5, 1)
flw (1, 2)
le [[ 0.59199101 -0.7339825 ]
 [ 1.18398202 -1.467965  ]
 [ 1.77597304 -2.2019475 ]
 [ 2.36796405 -2.93592999]
 [ 2.95995506 -3.66991249]]
fle (5, 2)
flw (2, 4)
le [[ 0.          0.         -0.          0.87169873]
 [ 0.          0.         -2.70978394  1.74339745]
 [ 0.          0.         -4.06467591  2.61509618]
 [ 0.          0.         -5.41956789  3.4867949 ]
 [ 0.          0.         -6.77445986  4.35849363]]
fle (5, 4)
flw (4, 8)
le [[ -0.           3.2043916   -0.           3.88718824   2.55337743
   -0.30578467   0.          -0.        ]
 [  2.78703557  10.43966793 -12.34646249  10.40261128   5.68432816
   -9.00649917   0.           0.        ]
 [  4.18055335  15.6595019  -18.51969373  15.60391693   8.52649225
  -13.50974876   0.           0.        ]
 [  5.57407114  20.87933586 -24.69292497  20.80522257  11.36865633
  -18.01299834   0.           0.        ]
 [  6.96758892  26.09916983 -30.86615622  26.00652821  14.21082041
  -22.51624

KeyError: 'Layer-1'

In [62]:
# forward_result