In [4]:
import numpy as np
from sklearn.model_selection import train_test_split

In [9]:
#make toy dataset
b0 = 1
b1 = 2
X = np.random.rand(1000,1)
Y = b0 +b1*X
X.shape

(1000, 1)

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [27]:
X_train.shape

(800, 1)

In [28]:
np.hstack((X_train,np.ones(X_train.shape))).shape

(800, 2)

In [29]:
#activation functions and their gradient functions
def sigmoid(X):
    return 1/(1+np.exp(-X))

def sigmoid_grad(X):
    return sigmoid(X) * (1 - sigmoid(X))

def tanh(z):
    return np.tanh(z)

def tanh_grad(z):
     return 1 - np.tanh(z) ** 2

def ReLU(z):
    return np.clip(z, 0, np.inf)

def ReLU_grad(z):
    return (z > 0).astype(int)

def affine(X,slope=1,intercept=0):
     return slope * X + intercept
    
def affine_grad(X,slope=1,intercept=0):
    return slope * np.ones_like(X)

In [32]:
#define neural network model
class NeuralNetwork:
    def __init__(self, input_dim, output_dim=1,hidden_dim = 4,lr=0.005):
        #init weights
        self.weights1   = np.random.rand(input_dim+1,hidden_dim) 
        self.weights2   = np.random.rand(hidden_dim,output_dim)                 
        #set learning rate
        self.lr         = lr
      
    def print_w(self):
        '''print weight to inspect the current values of network'''  
        print('print_weights ------------>')
        print(self.weights1)
        print(self.weights2)
        
    def feedforward(self,X):
        X = np.hstack((X,np.ones(X.shape)))
        self.layer1 = affine(np.dot(X, self.weights1))
        self.output = affine(np.dot(self.layer1, self.weights2))
        
    def backprop(self,X, Y):
        X = np.hstack((X,np.ones(X.shape)))
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        d_weights2 = np.dot(self.layer1.T, (2*(Y - self.output) * affine_grad(np.dot(self.layer1, self.weights2))))
        d_weights1 = np.dot(X.T,  \
                            (np.dot(2*(Y - self.output) * affine_grad(np.dot(self.layer1, self.weights2)), self.weights2.T)\
                             * affine_grad(np.dot(X, self.weights1))))

        # update the weights with the derivative (slope) of the loss function multiply learning rate
        self.weights1 += d_weights1*self.lr
        self.weights2 += d_weights2*self.lr
    
    def test(self,X):
        '''get predicted values for any input data'''
        X = np.hstack((X,np.ones(X.shape)))
        hidden_layer1 = affine(np.dot(X, self.weights1))
        return affine(np.dot(hidden_layer1, self.weights2))
        
    def train(self,X,Y,num_train_iterations):
        '''train model with X and Y for num_train_iterations times'''
        print('training  ---------------->')
        for iteration in range(num_train_iterations): 
            self.feedforward(X) 
            self.backprop(X,Y)
            #print interim MSE
            if iteration % 100 == 0:
                mse = np.mean((self.output - Y)**2)
                print("Epoch ", iteration, "MSE: ", mse)
                

In [37]:
batch_size = 4

#initialize network with fixed output dim of 1
neural_network = NeuralNetwork(X_train.shape[1],1,lr=1e-3)

for index in range(0,X_train.shape[0],batch_size):
    
    
    #get batch X and Y
    batch_X=X_train[index:min(index+batch_size,X_train.shape[0]),:]
    batch_Y=y_train[index:min(index+batch_size,y_train.shape[0])]
    
    #train model with batch
    neural_network.train(batch_X,batch_Y,500)
    
    #print final state of weights
    neural_network.print_w()

    # Test the neural network with new test data. 
    #get predicted y
    y_pred = neural_network.test(X_test)
    #compare predicted y and groundtruth 
#     print('predicted data ----------->')
#     print(y_pred)
#     print('real data ---------------->')
#     print(y_test)
    #calculate MSE
    mse = np.mean((y_test - y_pred)**2)
    print('MSE on test data --------->')
    print(mse)

training  ---------------->
Epoch  0 MSE:  0.0028017082943963515
Epoch  100 MSE:  0.0010522120983701465
Epoch  200 MSE:  0.0010261435009661856
Epoch  300 MSE:  0.001001134459514442
Epoch  400 MSE:  0.0009766987525295962
print_weights ------------>
[[0.66244555 0.3460568  0.08349324 0.90101862]
 [0.22791211 0.73912972 0.57069096 0.22606393]]
[[0.96994627]
 [0.46240241]
 [0.78430663]
 [0.81963768]]
MSE on test data --------->
0.014045054466575875
training  ---------------->
Epoch  0 MSE:  0.018993747090910847
Epoch  100 MSE:  0.007880261232055399
Epoch  200 MSE:  0.005898429211411289
Epoch  300 MSE:  0.004403236939497275
Epoch  400 MSE:  0.0032789021523346035
print_weights ------------>
[[0.72438039 0.37534376 0.13250812 0.95400643]
 [0.1946116  0.72355352 0.54470616 0.19739216]]
[[1.0062835 ]
 [0.46060961]
 [0.77248411]
 [0.87060648]]
MSE on test data --------->
0.0038710335388450912
training  ---------------->
Epoch  0 MSE:  0.0009418535314958677
Epoch  100 MSE:  0.00083468038590723
Ep

In [43]:
#manual validation 
X_val = np.array([[100],
        [200],
        [300],
        [400]])
y_val_pred = neural_network.test(X_val)
y_val_pred

array([[201.],
       [401.],
       [601.],
       [801.]])