![Alt Text](https://raw.githubusercontent.com/msfasha/307304-Data-Mining/main/20242/images/header.png)

<div style="display: flex; justify-content: flex-start; align-items: center;">
   <a href="https://colab.research.google.com/github/msfasha/307307-BI-Methods/blob/main/20242-NLP-LLM/lecture%20notes/Part%202%20-%20Introduction%20to%20Large%20Language%20Models/introduction_to_neural_networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
</div>

## The Perceptron

### Define the Perceptron

In [1]:
import numpy as np

class Perceptron:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        # Initialize parameters
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Training loop
        for _ in range(self.n_iterations):
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = 1 if linear_output >= 0 else 0

                # Perceptron update rule
                update = self.learning_rate * (y[idx] - y_predicted)
                self.weights += update * x_i
                self.bias += update

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        return np.where(linear_output >= 0, 1, 0)


### Run The Perceptron

In [2]:

import numpy as np
from matplotlib import pyplot as plt

# Training data for AND gate
X = np.array([[0, 0], 
              [0, 1], 
              [1, 0], 
              [1, 1]])
y = np.array([0, 0, 0, 1])

# Initialize and train the perceptron
perceptron = Perceptron(learning_rate=0.1, n_iterations=100)
perceptron.fit(X, y)

# Display results
print("Weights:", perceptron.weights)
print("Bias:", perceptron.bias)
print("Predictions:", perceptron.predict(X))


Weights: [0.2 0.1]
Bias: -0.20000000000000004
Predictions: [0 0 0 1]


---

## The Mulit-Layer Perceptron - MLP

### Solving the XOR Problem with a Neural Network

This code demonstrates how to build and train a simple neural network from scratch using NumPy to learn the XOR logic gate.

<div style="text-align: center;">
    <img src="https://raw.githubusercontent.com/msfasha/307307-BI-Methods/main/images/mlp.png" alt="Multi Layer Perceptron" width="600"/>
</div>

### 1. Import Required Libraries

In [3]:
import numpy as np

### 2. Define the Activation Function

In [4]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

### 3. Define the XOR Input and Output

In [5]:
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0],
              [1],
              [1],
              [0]])

### 4. Initialize Network Parameters (The Weights and Biases)

<div style="text-align: center;">
    <img src="https://raw.githubusercontent.com/msfasha/307307-BI-Methods/main/images/mlp.png" alt="Multi Layer Perceptron" width="600"/>
</div>

In [6]:
np.random.seed(42)

# 2 input features, 2 hidden neurons
weights_hidden = np.random.uniform(size=(2, 2))

# 1 bias for each hidden neuron
bias_hidden = np.random.uniform(size=(1, 2)) 


# 2 hidden neurons, 1 output neuron
weights_output = np.random.uniform(size=(2, 1))

# 1 bias for output neuron
bias_output = np.random.uniform(size=(1, 1))

learning_rate = 0.1
epochs = 10000

### 5. Train the Network Using Backpropagation

In [7]:
for epoch in range(epochs):
    # input to hidden layer
    hidden_layer_input = np.dot(X, weights_hidden) + bias_hidden 
    
    # Activation of hidden layer
    hidden_layer_output = sigmoid(hidden_layer_input)

    # input to output layer
    output_layer_input = np.dot(hidden_layer_output, weights_output) + bias_output
    
    # final output
    predicted_output = sigmoid(output_layer_input)

    # Backpropagation
    # calculate error, Mean Squared Error (MSE) loss function
    error = y - predicted_output
    # print error every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Error: {np.mean(np.abs(error))}")
    
    # derivative of sigmoid for output layer
    d_predicted_output = error * sigmoid_derivative(output_layer_input)
    
    
    # propagate error to hidden layer
    error_hidden_layer = d_predicted_output.dot(weights_output.T)
    # derivative of sigmoid for hidden layer
    d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_input)

    weights_output += hidden_layer_output.T.dot(d_predicted_output) * learning_rate # update weights
    bias_output += np.sum(d_predicted_output, axis=0, keepdims=True) * learning_rate # update bias
    weights_hidden += X.T.dot(d_hidden_layer) * learning_rate # update weights
    bias_hidden += np.sum(d_hidden_layer, axis=0, keepdims=True) * learning_rate # update bias

Epoch 1/10000, Error: 0.4977550305860017
Epoch 1001/10000, Error: 0.48962844155619734
Epoch 2001/10000, Error: 0.43050559183023696
Epoch 3001/10000, Error: 0.3357263739761261
Epoch 4001/10000, Error: 0.17357496319517718
Epoch 5001/10000, Error: 0.11181272498560178
Epoch 6001/10000, Error: 0.08576413241547491
Epoch 7001/10000, Error: 0.07130866479694546
Epoch 8001/10000, Error: 0.06197519138577699
Epoch 9001/10000, Error: 0.055372184098791376


### 6. Evaluate the Final Output
#### Run a forward pass to get the final output after training

In [8]:
print("Final predicted output:")

# input to hidden layer
hidden_layer_output = sigmoid(np.dot(X, weights_hidden) + bias_hidden)

# final output
predicted_output = sigmoid(np.dot(hidden_layer_output, weights_output) + bias_output)
print(np.round(predicted_output, 3))

Final predicted output:
[[0.053]
 [0.952]
 [0.952]
 [0.052]]


### 7. Display the Learned Parameters

In [9]:
print("\nLearned weights and biases:")
print("\nHidden layer weights:\n", weights_hidden)
print("\nHidden layer bias:\n", bias_hidden)
print("\nOutput layer weights:\n", weights_output)
print("\nOutput layer bias:\n", bias_output)


Learned weights and biases:

Hidden layer weights:
 [[3.79198478 5.81661184]
 [3.80004873 5.8545897 ]]

Hidden layer bias:
 [[-5.82020057 -2.46277158]]

Output layer weights:
 [[-8.32186051]
 [ 7.66063503]]

Output layer bias:
 [[-3.45550373]]


### Implementing MCP using Scikit Learn

In [10]:
from sklearn.neural_network import MLPClassifier
import numpy as np


# Define a neural network with 1 hidden layer of 2 neurons
model = MLPClassifier(hidden_layer_sizes=(2,),
                      activation='logistic',   # sigmoid
                      solver='sgd',            # stochastic gradient descent
                      learning_rate_init=0.5,
                      max_iter=10000,
                      random_state=42)

# Train the model
model.fit(X, y)

# Predict
predictions = model.predict(X)
probs = model.predict_proba(X)

print("Predictions:", predictions)
print("Probabilities:\n", np.round(probs, 3))

Predictions: [0 0 1 1]
Probabilities:
 [[0.506 0.494]
 [0.515 0.485]
 [0.489 0.511]
 [0.498 0.502]]


  y = column_or_1d(y, warn=True)


Display the learned weights and biases

In [11]:
# Display weights and biases
print("Weights (coefs_):")
for i, coef in enumerate(model.coefs_):
    print(f" Layer {i} weights:\n{np.round(coef, 3)}")

print("\nBiases (intercepts_):")
for i, intercept in enumerate(model.intercepts_):
    print(f" Layer {i} biases:\n{np.round(intercept, 3)}")


Weights (coefs_):
 Layer 0 weights:
[[0.005 0.585]
 [0.283 0.136]]
 Layer 1 weights:
[[-0.717]
 [ 0.48 ]]

Biases (intercepts_):
 Layer 0 biases:
[-0.451 -0.462]
 Layer 1 biases:
[0.069]


### Classifying Iris Dataset using MLP

In [12]:
from sklearn.datasets import load_iris
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

# Keep only Setosa (0) and Versicolor (1)
X = X[y != 2]
y = y[y != 2]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature scaling (important for MLP)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define and train MLP
mlp = MLPClassifier(hidden_layer_sizes=(5,),  # 1 hidden layer, 5 neurons
                    activation='relu',
                    solver='adam',
                    max_iter=1000,
                    random_state=42)

mlp.fit(X_train, y_train)

# Predict and evaluate
y_pred = mlp.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        13

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

