In [4]:
import numpy as np
from sklearn.model_selection import train_test_split

In [9]:
#make toy dataset
b0 = 1
b1 = 2
X = np.random.rand(1000,1)
Y = b0 +b1*X
X.shape

(1000, 1)

In [48]:
#make toy dataset with error
b0 = 1
b1 = 2
X = np.random.rand(1000,1)
Xe = 0.001*np.random.rand(1000,1)
Y = b0 +b1*X + Xe
X.shape

(1000, 1)

In [76]:
#make advanced dataset
b0 = 1
b1 = np.array([[1],[1],[1]])
X = np.random.rand(1000,3)
Xe = 0.001*np.random.rand(1000,1)
Y = b0 +np.dot(X,b1) + Xe
Y.shape

(1000, 1)

In [99]:
#make non-linear dataset
b0 = 1
b1 = 2
X1 = np.random.rand(1000,1)
X2 = np.random.rand(1000,1)
X = np.hstack((X1,X2))
Xe = 0.001*np.random.rand(1000,1)
Y = b0 +b1*X1*X2 + Xe
Y.shape

(1000, 1)

In [100]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [101]:
X_train.shape

(800, 2)

In [103]:
#activation functions and their gradient functions
def sigmoid(X):
    return 1/(1+np.exp(-X))

def sigmoid_grad(X):
    return sigmoid(X) * (1 - sigmoid(X))

def tanh(z):
    return np.tanh(z)

def tanh_grad(z):
     return 1 - np.tanh(z) ** 2

def ReLU(z):
    return np.clip(z, 0, np.inf)

def ReLU_grad(z):
    return (z > 0).astype(int)

def affine(X,slope=1,intercept=0):
     return slope * X + intercept
    
def affine_grad(X,slope=1,intercept=0):
    return slope * np.ones_like(X)

In [104]:
#define neural network model
class NeuralNetwork:
    def __init__(self, input_dim, output_dim=1,hidden_dim = 4,lr=0.005):
        #init weights
        self.weights1   = np.random.rand(input_dim+1,hidden_dim) 
        self.weights2   = np.random.rand(hidden_dim,output_dim)                 
        #set learning rate
        self.lr         = lr
      
    def print_w(self):
        '''print weight to inspect the current values of network'''  
        print('print_weights ------------>')
        print(self.weights1)
        print(self.weights2)
        
    def feedforward(self,X):
        X = np.hstack((X,np.ones((X.shape[0],1))))
        self.layer1 = affine(np.dot(X, self.weights1))
        self.output = affine(np.dot(self.layer1, self.weights2))
        
    def backprop(self,X, Y):
        X = np.hstack((X,np.ones((X.shape[0],1))))
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        d_weights2 = np.dot(self.layer1.T, (2*(Y - self.output) * affine_grad(np.dot(self.layer1, self.weights2))))
        d_weights1 = np.dot(X.T,  \
                            (np.dot(2*(Y - self.output) * affine_grad(np.dot(self.layer1, self.weights2)), self.weights2.T)\
                             * affine_grad(np.dot(X, self.weights1))))

        # update the weights with the derivative (slope) of the loss function multiply learning rate
        self.weights1 += d_weights1*self.lr
        self.weights2 += d_weights2*self.lr
    
    def test(self,X):
        '''get predicted values for any input data'''
        X = np.hstack((X,np.ones((X.shape[0],1))))
        hidden_layer1 = affine(np.dot(X, self.weights1))
        return affine(np.dot(hidden_layer1, self.weights2))
        
    def train(self,X,Y,num_train_iterations):
        '''train model with X and Y for num_train_iterations times'''
        print('training  ---------------->')
        for iteration in range(num_train_iterations): 
            self.feedforward(X) 
            self.backprop(X,Y)
            #print interim MSE
            if iteration % 100 == 0:
                mse = np.mean((self.output - Y)**2)
                print("Epoch ", iteration, "MSE: ", mse)
                

In [105]:
batch_size = 4

#initialize network with fixed output dim of 1
neural_network = NeuralNetwork(X_train.shape[1],1,lr=1e-3)

for index in range(0,X_train.shape[0],batch_size):
    
    
    #get batch X and Y
    batch_X=X_train[index:min(index+batch_size,X_train.shape[0]),:]
    batch_Y=y_train[index:min(index+batch_size,y_train.shape[0])]
    
    #train model with batch
    neural_network.train(batch_X,batch_Y,500)
    
    #print final state of weights
    neural_network.print_w()

    # Test the neural network with new test data. 
    #get predicted y
    y_pred = neural_network.test(X_test)
    #compare predicted y and groundtruth 
#     print('predicted data ----------->')
#     print(y_pred)
#     print('real data ---------------->')
#     print(y_test)
    #calculate MSE
    mse = np.mean((y_test - y_pred)**2)
    print('MSE on test data --------->')
    print(mse)

training  ---------------->
Epoch  0 MSE:  0.01566796964268804
Epoch  100 MSE:  0.008840228925821307
Epoch  200 MSE:  0.007623236479129471
Epoch  300 MSE:  0.0066419654941172
Epoch  400 MSE:  0.005849319588463215
print_weights ------------>
[[0.99926057 0.42007112 0.61570537 0.09378518]
 [0.43396907 0.47435377 0.98001919 0.7800464 ]
 [0.17345813 0.73134455 0.12088761 0.98436734]]
[[0.03037731]
 [0.46374741]
 [0.42525823]
 [0.44196917]]
MSE on test data --------->
0.05453772574525672
training  ---------------->
Epoch  0 MSE:  0.03290495328543479
Epoch  100 MSE:  0.022527252212268235
Epoch  200 MSE:  0.021223974498131022
Epoch  300 MSE:  0.020041965888636257
Epoch  400 MSE:  0.01896639826995125
print_weights ------------>
[[1.00308087 0.45290985 0.6469758  0.12296609]
 [0.43259348 0.45983194 0.96636136 0.76684734]
 [0.17101319 0.70691633 0.09784905 0.96226813]]
[[0.08091708]
 [0.44162445]
 [0.43440475]
 [0.37180922]]
MSE on test data --------->
0.03678654511239095
training  -------------

In [106]:
#manual validation 
X_val = np.array([[1,1],
        [2,0],
        [3,0],
        [4,1],
        [1,0]])
y_val_pred = neural_network.test(X_val)
y_val_pred

array([[2.29250698],
       [2.11666493],
       [2.87438573],
       [4.56566937],
       [1.35894414]])