In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
from numpy import linalg as LA
import matplotlib.pyplot as plt

In [8]:
data = pd.read_csv(r"data\Regression\BivariateData\24.csv", 
                   sep=",", names=['f1', 'f2', 'f3']).sample(frac=1).reset_index(drop=True) # 10201 
data.insert(loc=0, column='1', value=[1 for i in range(len(data))])

train = data.iloc[:6120, :] # 6120
val = data.iloc[6120:8160, :].reset_index(drop=True) # 2040
test = data.iloc[8160:, :].reset_index(drop=True) # 2041

In [10]:
class twoL_nn:
    
    def __init__(self, layer_sizes, learning_rate, num_epochs):
        self.input_size = layer_sizes[0]
        self.h1 = layer_sizes[1]
        self.h2 = layer_sizes[2]
        self.k = layer_sizes[-1]
        self.params = self.initialize_params()
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        
    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))

    def loss(self, y, s):
        return np.square(y - s)/2 

    def initialize_params(self):
        params = dict()
        params['W1'] = np.random.randn(self.h1, self.input_size)
        params['W2'] = np.random.randn(self.h2, self.h1+1)
        params['W3'] = np.random.randn(self.k, self.h2+1)
        return params

    def forward(self, x):
        W1, W2, W3 = self.params['W1'], self.params['W2'], self.params['W3']
        cache = dict()
        cache['A1'] = np.dot(W1, x)
        cache['H1'] = self.sigmoid(cache['A1'])
        cache['H1'] = np.append([[1]], cache['H1'], axis=0)
        cache['A2'] = np.dot(W2, cache['H1'])
        cache['H2'] = self.sigmoid(cache['A2'])
        cache['H2'] = np.append([[1]], cache['H2'], axis=0)
        cache['A3'] = np.dot(W3, cache['H2'])
        cache['H3'] = cache['A3']
        return cache['H3'], cache

    def backward(self, x, y):
        s, cache = self.forward(x)
        
        dell3 = (y - s) * 1 # k x 1
        dW3 = self.learning_rate * np.dot(dell3, cache['H2'].T) # k x j+1
        self.params['W3'] = self.params['W3'] + dW3 # k x j+1

        dell2 = np.dot(self.params['W3'][:, :-1].T, dell3) * cache['H2'][1:] * (1 - cache['H2'][1:]) # j x 1
        dW2 = self.learning_rate * np.dot(dell2, cache['H1'].T) # j x i+1
        self.params['W2'] = self.params['W2'] + dW2 # j x i+1
        
        dell1 = np.dot(self.params['W2'][:, :-1].T, dell2) * cache['H1'][1:] * (1 - cache['H1'][1:]) # i x 1
        dW1 = self.learning_rate * np.dot(dell1, x.T) # i x m
        self.params['W1'] = self.params['W1'] + dW1 # i x m
        
        return self.loss(y, s)

    def epoch(self, train):
        E = []
        for i in range(train.shape[0]):
#             y = np.zeros((self.k, 1))
#             y[train['y'][i]][0] = 1
            y = train.iloc[i, -1]
            x = np.array(train.iloc[i, :-1]).reshape((self.input_size, 1))
            e = self.backward(x, y)
            E.append(e)

        return np.mean(E)

    def fit(self, train):
        history = []
        for i in range(1, self.num_epochs + 1):
            E = self.epoch(train)
            history.append(E)
        return history
    
    def predict(self, test):
        preds = np.zeros(test.shape[0])
        for i in range(test.shape[0]):
            pred, _ = self.forward(np.array(test.iloc[i, :-1]).reshape((self.input_size, 1)))
            preds[i] = pred
        return preds

In [11]:
input_size = 3
layer_sizes = [3, 8, 8, 1]
learning_rate, num_epochs = 0.1, 20 
model = twoL_nn(layer_sizes, learning_rate, num_epochs)

history = model.fit(train)

In [22]:
p = model.predict(test)

from sklearn.metrics import mean_squared_error
mean_squared_error(np.array(test['f3']), p)

0.11326311879249697