In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical


In [None]:
# Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the images to values between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0

# Reshape data to fit the model
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# One-hot encode the labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x79e38a4e6dd0>

In [None]:
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f'\nTest accuracy: {test_acc}')


313/313 - 2s - loss: 0.0434 - accuracy: 0.9872 - 2s/epoch - 5ms/step

Test accuracy: 0.9872000217437744


 4-layer NEURAL NETWORK

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import scale
import time
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model as lm
import seaborn as sns
from sklearn.preprocessing import scale
import sklearn.linear_model as lm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.api as sm


Steps
1. Initialization: Initialize weights and biases.
2. Forward Propagation: Compute activations for each layer.
3. Cost Function: Calculate the cost (or loss).
4. Backward Propagation: Compute gradients for weights and biases.
5. Update Weights: Adjust weights and biases using gradients.

## Step 1: Initialization
We'll start by initializing weights and biases for a 4-layer neural network. Let's assume the network architecture is 3 input neurons, two hidden layers with 4 and 3 neurons, respectively, and 1 output neuron.

In [1]:
import numpy as np

# Seed for reproducibility
np.random.seed(1)

# Define the network architecture
input_size = 3
hidden_layer1_size = 4
hidden_layer2_size = 3
output_size = 1

# Initialize weights and biases
weights1 = np.random.randn(input_size, hidden_layer1_size)
biases1 = np.zeros((1, hidden_layer1_size))

weights2 = np.random.randn(hidden_layer1_size, hidden_layer2_size)
biases2 = np.zeros((1, hidden_layer2_size))

weights3 = np.random.randn(hidden_layer2_size, output_size)
biases3 = np.zeros((1, output_size))

# Activation function: Sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of sigmoid function
def sigmoid_derivative(x):
    return x * (1 - x)


## Step 2: Forward Propagation

In [2]:
def forward_propagation(X):
    # Layer 1
    z1 = np.dot(X, weights1) + biases1
    a1 = sigmoid(z1)

    # Layer 2
    z2 = np.dot(a1, weights2) + biases2
    a2 = sigmoid(z2)

    # Output Layer
    z3 = np.dot(a2, weights3) + biases3
    a3 = sigmoid(z3)

    return a1, a2, a3


## Step 3: Cost Function

In [3]:
def compute_cost(y_true, y_pred):
    m = y_true.shape[0]
    cost = np.sum((y_true - y_pred) ** 2) / m
    return cost


## Step 4: Backward Propagation

In [4]:
def backward_propagation(X, y_true, a1, a2, a3):
    global weights1, weights2, weights3, biases1, biases2, biases3

    m = y_true.shape[0]

    # Output layer error
    dz3 = a3 - y_true
    dw3 = np.dot(a2.T, dz3) / m
    db3 = np.sum(dz3, axis=0, keepdims=True) / m

    # Layer 2 error
    dz2 = np.dot(dz3, weights3.T) * sigmoid_derivative(a2)
    dw2 = np.dot(a1.T, dz2) / m
    db2 = np.sum(dz2, axis=0, keepdims=True) / m

    # Layer 1 error
    dz1 = np.dot(dz2, weights2.T) * sigmoid_derivative(a1)
    dw1 = np.dot(X.T, dz1) / m
    db1 = np.sum(dz1, axis=0, keepdims=True) / m

    return dw1, db1, dw2, db2, dw3, db3


## Step 5: Update Weights

In [5]:
def update_weights(dw1, db1, dw2, db2, dw3, db3, learning_rate):
    global weights1, weights2, weights3, biases1, biases2, biases3

    weights1 -= learning_rate * dw1
    biases1 -= learning_rate * db1

    weights2 -= learning_rate * dw2
    biases2 -= learning_rate * db2

    weights3 -= learning_rate * dw3
    biases3 -= learning_rate * db3


In [6]:
# Training data
X = np.array([[0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]])
y = np.array([[0], [1], [1], [0]])

learning_rate = 0.1
epochs = 10000

for epoch in range(epochs):
    # Forward propagation
    a1, a2, a3 = forward_propagation(X)

    # Compute cost
    cost = compute_cost(y, a3)

    # Backward propagation
    dw1, db1, dw2, db2, dw3, db3 = backward_propagation(X, y, a1, a2, a3)

    # Update weights
    update_weights(dw1, db1, dw2, db2, dw3, db3, learning_rate)

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Cost: {cost}")

# Final output after training
a1, a2, a3 = forward_propagation(X)
print("Final output:")
print(a3)


Epoch 0, Cost: 0.2492376745373431
Epoch 1000, Cost: 0.2325703139496443
Epoch 2000, Cost: 0.16630849432380088
Epoch 3000, Cost: 0.09563982674886173
Epoch 4000, Cost: 0.001988603223742915
Epoch 5000, Cost: 0.0003134081520971705
Epoch 6000, Cost: 0.000108235822191065
Epoch 7000, Cost: 5.1870902575283084e-05
Epoch 8000, Cost: 2.966386544515569e-05
Epoch 9000, Cost: 1.8931653956423368e-05
Final output:
[[0.00167018]
 [0.99685038]
 [0.99739231]
 [0.00570573]]
