In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [2]:
iris = load_iris()
data = pd.DataFrame(iris.data, columns=iris.feature_names)
data['species'] = iris.target
data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [3]:
X = iris.data
y = iris.target

# data splitting
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2,random_state=42)

In [4]:
# number of classes (iris has 3 species: 0,1,2)
num_classes = len(np.unique(y))
# X has shape (num_samples, num_features)
num_samples, num_features = X.shape
input_size = num_features

In [5]:
# one-hot in data processing
y_train_oh = np.eye(num_classes)[y_train]
y_test_oh  = np.eye(num_classes)[y_test]

In [6]:
# Activation functions
def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return (z > 0).astype(float)
    
def softmax(z):
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

In [7]:
class NeuralNet:

    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):

        self.params = {}
    
        self.params["W1"] = np.random.randn(input_size, hidden_size) * 0.01
        self.params["b1"] = np.zeros((1, hidden_size))
    
        self.params["W2"] = np.random.randn(hidden_size, output_size) * 0.01
        self.params["b2"] = np.zeros((1, output_size))

        self.learning_rate = learning_rate

    # Forward pass
    def forward(self, X):

        W1 = self.params["W1"]
        b1 = self.params["b1"]
        W2 = self.params["W2"]
        b2 = self.params["b2"]
        
        Z1 = X @ W1 + b1
        A1 = relu(Z1)
        Z2 = A1 @ W2 + b2
        A2 = softmax(Z2)

        # Store intermediate values for backpropagation
        self.cache = (Z1, A1, Z2, A2)
        
        return A2

    # Loss function
    def compute_loss(self, y_pred, y_true):
    
        # number of samples
        m = y_true.shape[0]
        
        
        # 加注释
        loss = -np.sum( y_true * np.log(y_pred) ) / m
        return loss




    
        
    # Backward pass
    def backward(self, X, y_true):

        W2 = self.params["W2"]
        
        Z1, A1, Z2, A2 = self.cache

        # number of samples
        m = y_true.shape[0] 
        
        dZ2 = (A2 - y_true) / m  
    
        dW2 = A1.T @ dZ2
        db2 = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = dZ2 @ W2.T
        dZ1 = dA1 * relu_derivative(Z1)

        dW1 = X.T @ dZ1
        db1 = np.sum(dZ1, axis=0, keepdims=True)


        self.params["W1"] -= self.learning_rate * dW1
        self.params["b1"] -= self.learning_rate * db1
        self.params["W2"] -= self.learning_rate * dW2
        self.params["b2"] -= self.learning_rate * db2



    # Prediction
    def predict(self, X):
        probs = self.forward(X)
        return np.argmax(probs, axis=1)

In [8]:
hidden_size = 10
learning_rate = 0.01
num_epochs = 1000


nn = NeuralNet(
    input_size=input_size,
    hidden_size=hidden_size,
    output_size=num_classes,
    learning_rate=learning_rate
)

for epoch in range(num_epochs):

    y_pred_train = nn.forward(X_train)

    loss = nn.compute_loss(y_pred_train, y_train_oh)

    nn.backward(X_train, y_train_oh)

    if (epoch + 1) % 100 == 0:
        train_preds = nn.predict(X_train)
        train_acc = np.mean(train_preds == y_train)
        print(f"Epoch {epoch+1:4d} | Loss: {loss:.4f} | Train accuracy: {train_acc:.4f}")

Epoch  100 | Loss: 1.0968 | Train accuracy: 0.3417
Epoch  200 | Loss: 1.0854 | Train accuracy: 0.3417
Epoch  300 | Loss: 1.0308 | Train accuracy: 0.3250
Epoch  400 | Loss: 0.8955 | Train accuracy: 0.6583
Epoch  500 | Loss: 0.7279 | Train accuracy: 0.6583
Epoch  600 | Loss: 0.5756 | Train accuracy: 0.6917
Epoch  700 | Loss: 0.4866 | Train accuracy: 0.8750
Epoch  800 | Loss: 0.4291 | Train accuracy: 0.9250
Epoch  900 | Loss: 0.3834 | Train accuracy: 0.9500
Epoch 1000 | Loss: 0.3429 | Train accuracy: 0.9750
