In [17]:
%matplotlib inline
#without scikit learn
"""
Simple code that tests XOR, OR and AND gates with linear regression

Design matrix and the various output vectors for the different gates
"""

# import necessary packages
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from autograd import grad

#sigmoid function = activation function
def sigmoid(x):
    return 1/(1 + np.exp(-x))


def feed_forward(X):
    # weighted sum of inputs to the hidden layer
    z_h = np.matmul(X, hidden_weights) + hidden_bias
    # activation in the hidden layer
    a_h = sigmoid(z_h)
    
    # weighted sum of inputs to the output layer
    z_o = np.matmul(a_h, output_weights) + output_bias
    # softmax output
    # axis 0 holds each input and axis 1 the probabilities of each category
    exp_term = np.exp(z_o)
    probabilities = exp_term / np.sum(exp_term, axis=1, keepdims=True)
    
    # for backpropagation need activations in hidden and output layers
    return a_h, probabilities

def backpropagation(X, Y):
    a_h, probabilities = feed_forward(X)
    
    # error in the output layer
    #error_output = probabilities - Y
    # error in the hidden layer
    #error_hidden = np.matmul(error_output, output_weights.T) * a_h * (1 - a_h)
    error_output = probabilities - Y.reshape(-1, 1)  # Reshape Y to a column vector
    error_hidden = np.dot(error_output, output_weights.T) * a_h * (1 - a_h)
    
    
    # gradients for the output layer
    output_weights_gradient = np.matmul(a_h.T, error_output)
    output_bias_gradient = np.sum(error_output, axis=0)
    
    # gradient for the hidden layer
    hidden_weights_gradient = np.matmul(X.T, error_hidden)
    hidden_bias_gradient = np.sum(error_hidden, axis=0)

    return output_weights_gradient, output_bias_gradient, hidden_weights_gradient, hidden_bias_gradient

#cost function --> cross entropy for classification of binary cases
def CostCrossEntropy(target):
    
    def func(X):
        return -(1.0 / target.size) * np.sum(target * np.log(X + 10e-10))

    return func


# ensure the same random numbers appear every time
np.random.seed(0)

# Design matrix
X = np.array([ [0, 0], [0, 1], [1, 0],[1, 1]],dtype=np.float64)

#target values
# XOR 
yXOR = np.array( [ 0, 1 ,1, 0])
# AND 
yAND = np.array( [ 0, 0 ,0, 1])
# OR 
yOR = np.array( [ 0, 1 ,1, 1])


# Defining the neural network
n_inputs, n_features = X.shape
n_hidden_neurons = 2 #hidden nodes
n_categories = 1 #the number of output categories --> number of output nodes in the neural network 

n_features = 2 #number of features/input dimensions in the dataset. 

# we make the weights normally distributed using numpy.random.randn

# weights and bias in the hidden layer
hidden_weights = np.random.randn(n_features, n_hidden_neurons)
hidden_bias = np.zeros(n_hidden_neurons) + 0.01

# weights and bias in the output layer
output_weights = np.random.randn(n_hidden_neurons, n_categories)
output_bias = np.zeros(n_categories) + 0.01

output = feed_forward(X) 

#calculating the gradients needed for the backpropagation part
cost_func = CostCrossEntropy #calculate cost using cost function
cost_func_derivative = grad(cost_func(output)) #derivative of cost function with respect to the output
#using autograd to automatically compute gradients --> want to apply these gradients for weight updates during backpropagation


#XOR
XORpredictions =np.round(output[1]) #xor predictions
XORaccuracy = np.mean(XORpredictions == yXOR) #xor accuracy
print("XOR Predictions:", XORpredictions)
print("XOR Accuracy:", XORaccuracy)

#OR
ORpredtictions = np.round(output[1])
ORaccuracy = np.mean(ORpredtictions == yOR)
print("OR Predictions:", ORpredtictions)
print("OR Accuracy:", ORaccuracy)

#AND
ANDpredictions = np.round(output[1])
ANDaccuracy = np.mean(ANDpredictions == yAND)
print("AND Predictions:", ANDpredictions)
print("AND Accuracy:", ANDaccuracy)



eta = 0.01 #learning rate
lmbd = 0.01 #regularization parameter
epochs_number = 100


for gate_name, y_target in [("XOR", yXOR), ("OR", yOR), ("AND", yAND)]:
    # Reset weights and biases for each gate
    hidden_weights = np.random.randn(n_features, n_hidden_neurons)
    hidden_bias = np.zeros(n_hidden_neurons) + 0.01
    output_weights = np.random.randn(n_hidden_neurons, n_categories)
    output_bias = np.zeros(n_categories) + 0.01

    for i in range(epochs_number):
        # Calculate gradients based on the current gate's target variable
        output_weights_gradient, output_bias_gradient, hidden_weights_gradient, hidden_bias_gradient = backpropagation(X, y_target)

        # Apply regularization to the gradients
        output_weights_gradient += lmbd * output_weights
        hidden_weights_gradient += lmbd * hidden_weights

        # Update weights and biases
        output_weights -= eta * output_weights_gradient
        output_bias -= eta * output_bias_gradient
        hidden_weights -= eta * hidden_weights_gradient
        hidden_bias -= eta * hidden_bias_gradient

    # Calculate and print accuracy for the current gate
    train_predictions = feed_forward(X)[1]
    train_accuracy = np.mean(np.round(train_predictions) == y_target)
    print(f"{gate_name} - Accuracy on training data: {train_accuracy:.2%}")




XOR Predictions: [[1.]
 [1.]
 [1.]
 [1.]]
XOR Accuracy: 0.5
OR Predictions: [[1.]
 [1.]
 [1.]
 [1.]]
OR Accuracy: 0.75
AND Predictions: [[1.]
 [1.]
 [1.]
 [1.]]
AND Accuracy: 0.25
XOR - Accuracy on training data: 50.00%
OR - Accuracy on training data: 75.00%
AND - Accuracy on training data: 25.00%


Backpropagation is the process of adjusting the weights of a neural network by analyzing the error rate from the previous iteration. It involves working backward from outputs to inputs to figure out how to reduce the number of errors and make a neural network more reliable. Type of supervised learning since it requires a known, desired output for each input value to caluculate the loss function gradient, which is how desired output values differ from actual output. 

An activatino function is a function that is added into an artificial NN in order to help the network lean complex patterns in the data. When comparing with a neuron-based model that is in out brains, the activatio function is at the end deciding what is to be fired to the next neuron. Takes the output signal from the previous cell and converts it into come form that can be taken as input to the next cell. 



In [None]:
# import necessary packages
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import seaborn as sns

# ensure the same random numbers appear every time
np.random.seed(0)

# Design matrix
X = np.array([ [0, 0], [0, 1], [1, 0],[1, 1]],dtype=np.float64)

# The XOR gate
yXOR = np.array( [ 0, 1 ,1, 0])
# The OR gate
yOR = np.array( [ 0, 1 ,1, 1])
# The AND gate
yAND = np.array( [ 0, 0 ,0, 1])

# Defining the neural network
n_hidden_neurons = 2

eta_vals = np.logspace(-5, 1, 7)
lmbd_vals = np.logspace(-5, 1, 7)
# store models for later use
DNN_scikit = np.zeros((len(eta_vals), len(lmbd_vals)), dtype=object)
epochs = 100

for i, eta in enumerate(eta_vals):
    for j, lmbd in enumerate(lmbd_vals):
        dnn = MLPClassifier(hidden_layer_sizes=(n_hidden_neurons), activation='logistic',
                            alpha=lmbd, learning_rate_init=eta, max_iter=epochs)
        dnn.fit(X, yXOR)
        DNN_scikit[i][j] = dnn
        print("Learning rate  = ", eta)
        print("Lambda = ", lmbd)
        print("Accuracy score on data set: ", dnn.score(X, yXOR))
        print()

sns.set()
test_accuracy = np.zeros((len(eta_vals), len(lmbd_vals)))
for i in range(len(eta_vals)):
    for j in range(len(lmbd_vals)):
        dnn = DNN_scikit[i][j]
        test_pred = dnn.predict(X)
        test_accuracy[i][j] = accuracy_score(yXOR, test_pred)

fig, ax = plt.subplots(figsize = (10, 10))
sns.heatmap(test_accuracy, annot=True, ax=ax, cmap="viridis")
ax.set_title("Test Accuracy")
ax.set_ylabel("$\eta$")
ax.set_xlabel("$\lambda$")
plt.show()