In [None]:
import matplotlib.pyplot as plt #library for visualizing data
%matplotlib widget 
#setting for jupyter lab
plt.rcParams['figure.figsize'] = [12, 6] #setting figure size (plots)

import pandas as pd  # (software library for data analysis and manipulation, https://pandas.pydata.org/docs/)
import numpy as np  # (software library for matrix multiplications, https://numpy.org/doc/)
import statistics as stats  # (python module for statistic calculations, https://docs.python.org/3/library/statistics.html)
import time #python time module



def sigmoid(x):
        return 1 / (1 + np.exp(-x))


def sigmoid_deriv(x):
    return sigmoid(x) * (1.0 - sigmoid(x))


def tanh(x):
    return np.tanh(x)


def tanh_deriv(x):
    return 1 - x ** 2


activations = {}


class NeuralNet:

    history = []
    layer_topology = []

    
    
    def __init__(self, layers, activation='tanh', print_model_info=True):
        if activation == 'sigmoid':
            self.activation = sigmoid
            self.activation_prime = sigmoid_deriv
        elif activation == 'tanh':
            self.activation = tanh
            self.activation_prime = tanh_deriv

        self.layers = layers

        # Set weights for each layer
        self.weights = []
        for i in range(0, len(layers) - 1):
            additional_column = 1 if i < len(layers) - 2 else 0
            r = np.random.randint(-200, 200, (layers[i] + 1, layers[i + 1] + additional_column)) * 0.001
            self.weights.append(r)

        if print_model_info:
            self.print_model_info()

            
            
    def print_model_info(self):
        print('Model: \n')

        layers = self.layers
        max_nodes = max(layers)
        for i, layer in enumerate(layers):
            diff = abs(max_nodes - layer)
            output = ''
            for k in range(round(diff + 0.5 / 2)):
                output += ' '
            for j in range(layer):
                output += ' O'
            for k in range(round(diff + 5)):
                output += ' '
            if i == 0:
                output += f'{layer} Input(s)'
            elif i == len(layers) - 1:
                output += f'{layer} Output(s)'
            else:
                output += f'{layer} Node(s)'
            print(output)

        print('\nActivation:', str(self.activation))

        weight_count = 0
        for weight in self.weights:
            weight_count += weight.shape[0] * weight.shape[1]

        print(f'{len(layers)} Layers')
        print(f'{weight_count} trainable parameters.\n\n')

        
        
    def fit(self, X, y, l_rate=0.2, epochs=100000, print_freq=100):
        t_0 = time.perf_counter()

        ones = np.atleast_2d(np.ones(X.shape[0]))
        X = np.hstack((ones.T, X))  # stack ones for bias

        history = []

        for k in range(epochs):
            t_0_epoch = time.perf_counter()
            for i, row in enumerate(X):  # loop through every training data sample

                # FEED FORWARD
                layer_activations = [row]
                for w_i in range(len(self.weights)):
                    dot_value = np.matmul(layer_activations[w_i], self.weights[w_i])
                    activation = self.activation(dot_value)
                    layer_activations.append(activation)

                # 1: Output Layer
                nn_result = layer_activations[-1]  # last item in our layer_activations is the output of our network
                error = y[i] - nn_result

                deltas = [error * self.activation_prime(layer_activations[-1])]

                # 2: Backpropagation Hidden Layers
                for la_i in range(len(layer_activations) - 2, 0, -1):  # iterate backwards through layer_activations and weights
                    ##compute delta based on previous examined layers deltas and save in list
                    #?
                deltas.reverse()  # invert order of delta list

                # 3: Gradient calculation and weight update
                for w_i in range(len(self.weights)):
                    layer_activation = np.atleast_2d(layer_activations[w_i])
                    delta = np.atleast_2d(deltas[w_i])
                    gradient = np.matmul(layer_activation.T, delta)
                    self.weights[w_i] += l_rate * gradient

            # performance control
            Y_pred = nn.predict(X[:, 1:])
            error = Y_pred - y.reshape(len(y), 1)
            mse = (error ** 2).mean()
            mae = np.abs(error).mean()

            history.append([k, mse, mae])

            # print output
            #? add validation data
            if k % print_freq == 0:
                t_1_epoch = time.perf_counter()
                time_delta_epochs = t_1_epoch - t_0_epoch
                mean_time_delta_epochs_ms = round((time_delta_epochs / print_freq) * 1000, 4)
                print(f'Epochs: {k}  -  MSE: {round(mse,4)}  -  MAE: {round(mae,4)}  -  Mean Time per Epoch: {mean_time_delta_epochs_ms} ms')

        self.history = history
        print(f'\nTraining finished. Time consumed: {round(time.perf_counter() - t_0, 2)} s')
        return history

    
    
    def predict(self, X):
        X = np.atleast_2d(X)
        ones = np.atleast_2d(np.ones(X.shape[0]))
        prediction = np.hstack((ones.T, X))

        for weight_matrix in self.weights:
            prediction = self.activation(np.matmul(prediction, weight_matrix))
        return prediction


In [None]:
#training data gates
xor_gate = pd.DataFrame([[0,0,0],[1,0,1],[0,1,1],[1,1,0]], columns = ['x1', 'x2', 'y'])
and_gate = pd.DataFrame([[1,1,1],[1,0,0],[0,1,0],[0,0,0]], columns = ['x1', 'x2', 'y'])
or_gate = pd.DataFrame([[1,1,1],[1,0,1],[0,1,1],[0,0,0]], columns = ['x1', 'x2', 'y'])
X = xor_gate[['x1', 'x2']].to_numpy()
y = xor_gate[['y']].to_numpy()

In [None]:
#beer_fun_example
beer_fun_data = pd.read_csv('./regression_example_data.csv')
X = beer_fun_data[['beer_cons']].to_numpy()
y = beer_fun_data[['fun']].to_numpy()

In [None]:
nn = NeuralNet([2,2,1])
history = nn.fit(X, y, l_rate=0.1, epochs = 30000, print_freq=500)

In [None]:
plt.close('all')
history_df = pd.DataFrame(history, columns=['epoch', 'MSE', 'MAE'])
history_df.plot(x='epoch', y=['MAE'])
history_df.tail(int(len(history_df.index)/1.5)).plot(x='epoch', y='MAE')

In [None]:
#create test data
X_test = np.atleast_2d(np.linspace(0,1,1001,1)).T
Y=nn.predict(X_test)

predictions = pd.DataFrame([X_test.flatten(),Y.flatten()]).T
predictions.columns = ['beer_cons', 'fun']

training_data = pd.DataFrame([X.flatten(),y.flatten()]).T
training_data.columns = ['beer_cons', 'fun']

In [None]:
#display predictions
ax = training_data.plot(kind='scatter', x= 'beer_cons', y='fun')
predictions.plot(kind='line', x= 'beer_cons', y='fun', ax = ax, color='orange')