In [3]:
import numpy as np

In [2]:
#make toy dataset
b0 = 1
b1 = 2
X_train = np.random.rand(1000,1)
y_train = b0 +b1*X_train
X_train.shape

(1000, 1)

In [3]:
#make a small dataset for testing
X_test = np.random.rand(4,1)
y_test = b0 +b1*X_test

In [4]:
#activation functions and their gradient functions
def sigmoid(X):
    return 1/(1+np.exp(-X))

def sigmoid_grad(X):
    return sigmoid(X) * (1 - sigmoid(X))

def tanh(z):
    return np.tanh(z)

def tanh_grad(z):
     return 1 - np.tanh(z) ** 2

def ReLU(z):
    return np.clip(z, 0, np.inf)

def ReLU_grad(z):
    return (z > 0).astype(int)

def affine(X,slope=1,intercept=0):
     return slope * X + intercept
    
def affine_grad(X,slope=1,intercept=0):
    return slope * np.ones_like(X)

In [13]:
#define neural network model
class NeuralNetwork:
    def __init__(self, input_dim, output_dim=1,hidden_dim = 4,lr=0.005):
        #init weights
        self.weights1   = np.random.rand(input_dim,hidden_dim) 
        self.weights2   = np.random.rand(hidden_dim,output_dim)                 
        #set learning rate
        self.lr         = lr
      
    def print_w(self):
        '''print weight to inspect the current values of network'''  
        print('print_weights ------------>')
        print(self.weights1)
        print(self.weights2)
        
    def feedforward(self,X):
    
        self.layer1 = affine(np.dot(X, self.weights1))
        self.output = affine(np.dot(self.layer1, self.weights2))
        
    def backprop(self,X, Y):
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        d_weights2 = np.dot(self.layer1.T, (2*(Y - self.output) * affine_grad(np.dot(self.layer1, self.weights2))))
        d_weights1 = np.dot(X.T,  \
                            (np.dot(2*(Y - self.output) * affine_grad(np.dot(self.layer1, self.weights2)), self.weights2.T)\
                             * affine_grad(np.dot(X, self.weights1))))

        # update the weights with the derivative (slope) of the loss function multiply learning rate
        self.weights1 += d_weights1*self.lr
        self.weights2 += d_weights2*self.lr
    
    def test(self,data):
        '''get predicted values for any input data'''
        hidden_layer1 = affine(np.dot(data, self.weights1))
        return affine(np.dot(hidden_layer1, self.weights2))
        
    def train(self,X,Y,num_train_iterations):
        '''train model with X and Y for num_train_iterations times'''
        print('training  ---------------->')
        for iteration in range(num_train_iterations): 
            self.feedforward(X) 
            self.backprop(X,Y)
            #print interim MSE
            if iteration % 100 == 0:
                mse = np.mean((self.output - Y)**2)
                print("Epoch ", iteration, "MSE: ", mse)
                

In [14]:
batch_size = 4

#initialize network with fixed output dim of 1
neural_network = NeuralNetwork(X_train.shape[1],1,lr=1e-3)

for index in range(0,X_train.shape[0],batch_size):
    
    
    #get batch X and Y
    batch_X=X_train[index:min(index+batch_size,X_train.shape[0]),:]
    batch_Y=y_train[index:min(index+batch_size,y_train.shape[0])]
    
    #train model with batch
    neural_network.train(batch_X,batch_Y,500)
    
    #print final state of weights
    neural_network.print_w()

    # Test the neural network with new test data. 
    #get predicted y
    y_pred = neural_network.test(X_test)
    #compare predicted y and groundtruth 
    print('predicted data ----------->')
    print(y_pred)
    print('real data ---------------->')
    print(y_test)
    #calculate MSE
    mse = np.mean((y_test - y_pred)**2)
    print('MSE on test data --------->')
    print(mse)

training  ---------------->
Epoch  0 MSE:  1.839821596888186
Epoch  100 MSE:  0.18254700516823735
Epoch  200 MSE:  0.15732665019899092
Epoch  300 MSE:  0.15720780114101435
Epoch  400 MSE:  0.15720730239542594
print_weights ------------>
[[1.07754036 1.13452736 1.21808557 0.15097247]]
[[0.82614328]
 [0.66845999]
 [1.34439369]
 [0.18575903]]
predicted data ----------->
[[1.35357535]
 [2.50213659]
 [2.59570973]
 [1.3790383 ]]
real data ---------------->
[[1.81682892]
 [2.50993998]
 [2.56640765]
 [1.8321948 ]]
MSE on test data --------->
0.10521854945462936
training  ---------------->
Epoch  0 MSE:  0.39151738468903624
Epoch  100 MSE:  0.1798944555123751
Epoch  200 MSE:  0.10873264040671071
Epoch  300 MSE:  0.08970876739938269
Epoch  400 MSE:  0.08524167063725158
print_weights ------------>
[[1.27234029 1.29774725 1.52366904 0.19282626]]
[[1.06786844]
 [0.91867499]
 [1.62640076]
 [0.22111963]]
predicted data ----------->
[[2.07132873]
 [3.82893158]
 [3.97212326]
 [2.11029378]]
real data --