# Week 2

In [12]:
import numpy as np

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Binary cross-entropy loss function
def compute_cost(y, y_pred):
    m = y.shape[0]  # Number of examples
    cost = -(1/m) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
    return cost

# Gradient descent function
def gradient_descent(X, y, weights, bias, learning_rate, epochs):
    m = X.shape[0]  # Number of examples

    for _ in range(epochs):
        # Forward propagation
        z = np.dot(X, weights) + bias
        y_pred = sigmoid(z)

        # Compute the cost
        cost = compute_cost(y, y_pred)

        # Backward propagation (gradient computation)
        dw = (1/m) * np.dot(X.T, (y_pred - y))
        db = (1/m) * np.sum(y_pred - y)

        # Update weights and bias
        weights -= learning_rate * dw
        bias -= learning_rate * db

    return weights, bias, cost


### AND Gate
Made using a logistic regression i.e. neural net with one neuron

In [21]:
# Input and output for AND gate
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0], [0], [0], [1]])

# Initialize weights and bias
#weights = np.zeros((X.shape[1], 1))  # 2x1 for 2 input features
weights = np.random.rand(X.shape[1], 1)
bias = 0

# Set hyperparameters
learning_rate = 0.1
epochs = 10000

# Train the logistic regression model
weights, bias, cost = gradient_descent(X, y, weights, bias, learning_rate, epochs)

print("Trained weights:", weights)
print("Trained bias:", bias)

# Make predictions
y_pred = sigmoid(np.dot(X, weights) + bias)
y_pred = np.round(y_pred)  # Round to get 0 or 1

print("\nPredictions:")
print(y_pred)


Trained weights: [[7.41918007]
 [7.41918041]]
Trained bias: -11.301366033410076

Predictions:
[[0.]
 [0.]
 [0.]
 [1.]]


### XOR Gate 
Using a perceptron

In [22]:
# Input and output for xor gate
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0], [1], [1], [0]])

# Initialize weights and bias
#weights = np.zeros((X.shape[1], 1))  # 2x1 for 2 input features
weights = np.random.rand(X.shape[1], 1)
bias = 0

# Set hyperparameters
learning_rate = 0.1
epochs = 10000

# Train the logistic regression model
weights, bias, cost = gradient_descent(X, y, weights, bias, learning_rate, epochs)

print("Trained weights:", weights)
print("Trained bias:", bias)

# Make predictions
y_pred = sigmoid(np.dot(X, weights) + bias)
y_pred = np.round(y_pred)  # Round to get 0 or 1

print("\nPredictions:")
print(y_pred)


Trained weights: [[2.49330776e-16]
 [2.48742219e-16]]
Trained bias: -3.31850112596023e-16

Predictions:
[[0.]
 [0.]
 [0.]
 [0.]]


**Issue with using a perceptron for an XOR gate:**
- There is no linear function that can make a linear combination of (1,0) and (0,1) greater than that of (0,0) and (1,1) simultaneously.
- Hence there is a need to build a hidden layer
- Intuitively, I think that the hidden layer will need only two neurons so I will start by trying that

**Using a hidden layer for an XOR gate**

- The first task is improving the functions to handle forward, backprop for multi layer
- Then updating the model training data for an XOR gate

In [23]:
#Writing better functions

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Derivative of the sigmoid function
def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))

# Binary cross-entropy loss function
def compute_cost(y, y_pred):
    m = y.shape[0]  # Number of examples
    cost = -(1/m) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
    return cost

# Neural network training function
def train_neural_network(X, y, hidden_neurons, output_neurons, learning_rate, epochs):
    np.random.seed(42)  # For reproducibility

    input_neurons = X.shape[1]
    m = X.shape[0]  # Number of examples

    # Initialize weights and biases
    W1 = np.random.rand(input_neurons, hidden_neurons)  # Input to hidden layer
    b1 = np.zeros((1, hidden_neurons))
    W2 = np.random.rand(hidden_neurons, output_neurons)  # Hidden to output layer
    b2 = np.zeros((1, output_neurons))

    for _ in range(epochs):
        # Forward propagation
        Z1 = np.dot(X, W1) + b1
        A1 = sigmoid(Z1)  # Activation of hidden layer
        Z2 = np.dot(A1, W2) + b2
        A2 = sigmoid(Z2)  # Activation of output layer (predictions)

        # Compute the cost
        cost = compute_cost(y, A2)

        # Backward propagation
        dZ2 = A2 - y
        dW2 = (1/m) * np.dot(A1.T, dZ2)
        db2 = (1/m) * np.sum(dZ2, axis=0, keepdims=True)

        dA1 = np.dot(dZ2, W2.T)
        dZ1 = dA1 * sigmoid_derivative(Z1)
        dW1 = (1/m) * np.dot(X.T, dZ1)
        db1 = (1/m) * np.sum(dZ1, axis=0, keepdims=True)

        # Update weights and biases
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1

    return W1, b1, W2, b2, cost


In [24]:
# Now updating model for XOR

# XOR gate input and output
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0], [1], [1], [0]])  # Output for XOR gate

# Set hyperparameters
hidden_neurons = 2  # Number of neurons in the hidden layer
output_neurons = 1  # Number of output neurons
learning_rate = 0.1
epochs = 10000

# Train the neural network
W1, b1, W2, b2, cost = train_neural_network(X, y, hidden_neurons, output_neurons, learning_rate, epochs)

print("Trained weights and biases:")
print("W1:", W1)
print("b1:", b1)
print("W2:", W2)
print("b2:", b2)

# Make predictions
Z1 = np.dot(X, W1) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(A1, W2) + b2
A2 = sigmoid(Z2)

predictions = np.round(A2)  # Round to get binary output

print("\nPredictions:")
print(predictions)

Trained weights and biases:
W1: [[4.57608671 6.51164718]
 [4.578482   6.52534109]]
b1: [[-7.00486476 -2.86347719]]
W2: [[-10.32773912]
 [  9.61540107]]
b2: [[-4.3991656]]

Predictions:
[[0.]
 [1.]
 [1.]
 [0.]]


**XOR Success**
- This has been successful. A hidden layer with two neurons successfully makes an XOR gate
- Now onto the Full Adder
### Full Adder 


In [41]:
# Full Adder input and output
X = np.array([[0, 0, 0],
              [0, 0, 1],
              [0, 1, 0],
              [0, 1, 1],
              [1, 0, 0],
              [1, 0, 1],
              [1, 1, 0],
              [1, 1, 1]])

y = np.array([[0, 0],  # S = 0, Cout = 0
              [1, 0],  # S = 1, Cout = 0
              [1, 0],  # S = 1, Cout = 0
              [0, 1],  # S = 0, Cout = 1
              [1, 0],  # S = 1, Cout = 0
              [0, 1],  # S = 0, Cout = 1
              [0, 1],  # S = 0, Cout = 1
              [1, 1]]) # S = 1, Cout = 1

# Set hyperparameters
hidden_neurons = 4  # Number of neurons in the hidden layer
output_neurons = 2  # Number of output neurons (S and Cout)
learning_rate = 0.1
epochs = 10000

# Train the neural network
W1, b1, W2, b2, cost = train_neural_network(X, y, hidden_neurons, output_neurons, learning_rate, epochs)

print("Trained weights and biases:")
print("W1:", W1)
print("b1:", b1)
print("W2:", W2)
print("b2:", b2)

# Make predictions
Z1 = np.dot(X, W1) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(A1, W2) + b2
A2 = sigmoid(Z2)

predictions = np.round(A2)  # Round to get binary output

print("\nPredictions:")
print(predictions)

y=2


Trained weights and biases:
W1: [[4.36184396 2.08160792 2.3240974  6.60643639]
 [4.37140089 2.13188855 2.27446036 6.60566585]
 [4.39303902 2.51765735 1.96547882 6.61061256]]
b1: [[-1.88606008 -5.5973929  -5.50020541 -9.69918861]]
W2: [[  8.19534238   0.93871435]
 [  6.09788206   4.25425231]
 [  7.09584195   3.50040505]
 [-11.27013142   8.85934498]]
b2: [[-3.9591962  -6.44376329]]

Predictions:
[[0. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 1.]]


**Conclusions from Full Adder Instance**
- Two hidden neurons are insufficient
- Three are sufficient

### Ripple Carry Adder 

In [60]:
# n bit adder
n = 5
a = "11111"
b = "11111"
sum = ""
ins_for_net = np.zeros((1, 3), dtype=int)

for i in range(0,n):
    ins_for_net[0,0] = a[n-i-1]   # sets the first value in input as a[] 
    ins_for_net[0,1] = b[n-i-1]   # sets the second value in input as b[] 
    Z1 = np.dot(ins_for_net, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    predictions = np.round(A2)
    ins_for_net[0,2] = predictions[0,1]
    sum = str(int(predictions[0,0])) + sum
    #print(predictions)

sum = str(int(predictions[0,1]))+sum
print(a, " + ",b, " = ")
print(sum)

11111  +  11111  = 
111110


### Conclusions

- Neural nets work have multiple layers of multiple logistic regressions
- They make use of non-linearities of logistic regressions to achieve interesting effects
- Back propogogation is done simultaneously to all layers, hence it makes sense to seed coefficients randomly or the network can behave "symmetrically"
- I didnt make the computer "learn" a ripple carry adder as it made more sense to use the full adder it had already learnt and cascade it