In [31]:
import numpy as np


For deep q learning, there are a couple of differences in back prop we have to set up, but overall it's very similar to a typical NN setup

Helper Functions Set Up:

    - ReLU

    - softmax

    - derivative of ReLU

    

In [32]:
def ReLU(Z):
    """Applies ReLU"""
    return np.maximum(0, Z)

def softmax(Z):
    """Applies softmax"""
    return np.exp(Z)/np.sum(np.exp(Z))

def deriv_ReLU(Z):
    """Diffirentiates ReLU"""
    return Z > 0

NN functionality:

    - init params

    - forward prop

    - back prop

    - updating params
    

In [33]:
def init_params(nn_structure:list):
    """This function sets up initial parameters W1, b1, W2, b2, ... following the given structure"""

    #weights and biases will contain np matrices of the weights and biases for each layer such that
    #weight[n] = n'th layer weights, etc
    weights = []
    biases = []
    
    for layer_size_index in range(1, len(nn_structure)):
        
        #current layer starting at 1st
        current_layer_size = nn_structure[layer_size_index]
        #previous layer start at 0th (input)
        previous_layer_size = nn_structure[layer_size_index - 1]

        W = np.random.rand(current_layer_size, previous_layer_size) - 0.5
        b = np.random.rand(current_layer_size, 1) - 0.5

        weights.append(W)
        biases.append(b)

    return weights, biases


def forward_propogate(weights, biases ,input_layer, functions):
    """forward propogates the NN using inserted params & functions, given the input layer"""

    s = input_layer.copy() #just for easier notation
    nn_length = len(weights)

    #will be eventually returned (with X removed)
    forward_propogation_params_A = [s]
    forward_propogation_params_Z = []
    
    #forward propogates
    for i in range(nn_length):
        #Note that A_0 = s
        #Z_n = W_n @ A_(n-1) + b_n
        Z = weights[i] @ forward_propogation_params_A[-1] + biases[i]
        
        #A_n = activation function(Z_n)
        A = functions[i](Z)
        print(A)

        forward_propogation_params_Z.append(Z)
        forward_propogation_params_A.append(A)

    forward_propogation_params_A.pop(0) #removes s
    
    
    return forward_propogation_params_A, forward_propogation_params_Z


def back_propogate(forward_propogation_params_A, forward_propogation_params_Z, weights, batch_params, functions, gamma):
    """propogates backwards through the network using gradient descent"""

    #unpack batch
    s = batch_params[0]
    a = batch_params[1]
    r = batch_params[2]
    s_next = batch_params[3]


    #get the Q(s, a, theta)
    final_layer = forward_propogation_params_A[-1]
    Q_s_value = final_layer[a]

    #get the max(Q(s', a', theta-))
    Q_s_next_value = 1#np.max(forward_propogate(, s_next, functions)[0][-1]) #gets the largest entry from A_last
    
    #calculates error:
        #loss = (r + gamma * max(Q(s', a', theta-) - Q(s, a, theta))**2

    loss = 0.5 * (r + gamma * Q_s_next_value - Q_s_value)**2


    #back propogation


def update_params(weights, biases, back_propogration_weights, back_propogration_biases, alpha):
    """updates all the params in the neural netwrok"""

    nn_length = len(weights)

    
    #update everything
    for i in range(nn_length):
        #split for notation
        W = weights[i]
        b = biases[i]

        dW = back_propogration_weights[i]
        db = back_propogration_biases[i]


        #update the biases
        W -= alpha * dW
        b -= alpha * db

    return weights, biases #Since arrays are pointers, I can just return the orignial list of arrays


In [None]:
nn_structue = [3, 5, 2]

functions = [ReLU, softmax]

weights, biases = init_params(nn_structue)

X = np.array([[1, 2, 3]]).T
print(X)
forward_propogate(weights, biases,X, functions)


[1 2 3]
[[0.         0.36224339 0.         0.34682175 0.        ]
 [0.         0.46186242 0.         0.44644079 0.        ]
 [0.         0.41658598 0.         0.40116435 0.        ]
 [0.         0.04258405 0.         0.02716242 0.        ]
 [0.         0.21030402 0.         0.19488238 0.        ]]
[[0.14045283 0.1418157  0.14045283 0.1422286  0.14045283]
 [0.05869086 0.05908179 0.05869086 0.05944284 0.05869086]]


([array([[0.        , 0.36224339, 0.        , 0.34682175, 0.        ],
         [0.        , 0.46186242, 0.        , 0.44644079, 0.        ],
         [0.        , 0.41658598, 0.        , 0.40116435, 0.        ],
         [0.        , 0.04258405, 0.        , 0.02716242, 0.        ],
         [0.        , 0.21030402, 0.        , 0.19488238, 0.        ]]),
  array([[0.14045283, 0.1418157 , 0.14045283, 0.1422286 , 0.14045283],
         [0.05869086, 0.05908179, 0.05869086, 0.05944284, 0.05869086]])],
 [array([[-0.5209631 ,  0.36224339, -0.53561756,  0.34682175, -1.61161614],
         [-0.42134406,  0.46186242, -0.43599853,  0.44644079, -1.5119971 ],
         [-0.4666205 ,  0.41658598, -0.48127497,  0.40116435, -1.55727354],
         [-0.84062243,  0.04258405, -0.8552769 ,  0.02716242, -1.93127547],
         [-0.67290247,  0.21030402, -0.68755693,  0.19488238, -1.76355551]]),
  array([[ 0.3962521 ,  0.40590866,  0.3962521 ,  0.408816  ,  0.3962521 ],
         [-0.47633571, -0.46969686, -0.4