In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from scipy import sparse

In [2]:
def softmax(V):
    e_V = np.exp(V - np.max(V, axis=0, keepdims=True))
    Z = e_V / e_V.sum(axis=0)
    return Z

In [4]:
# cost or loss function 
def cost(Y, Yhat): 
    return -np.sum(Y*np.log(Yhat))/Y.shape[1]

In [5]:

def convert_labels(y, C):
    Y = sparse.coo_matrix((np.ones_like(y),
                            (y, np.arange(len(y)))), shape=(C, len(y))).toarray()
    return Y

iris = datasets.load_iris()
X = iris.data.T  
y = iris.target  

X_train, X_test, y_train, y_test = train_test_split(X.T, y, test_size=0.3, random_state=42)
X_train = X_train.T 
X_test = X_test.T  

d0 = X_train.shape[0] 
d1 = h = 100           
d2 = C = 3             

W1 = 0.01 * np.random.randn(d0, d1)
b1 = np.zeros((d1, 1))
W2 = 0.01 * np.random.randn(d1, d2)
b2 = np.zeros((d2, 1))

Y = convert_labels(y_train, C)
N = X_train.shape[1] 
eta = 0.01 

for i in range(10000):
    ## Feedforward
    Z1 = np.dot(W1.T, X_train) + b1
    A1 = np.maximum(Z1, 0)  # ReLU
    Z2 = np.dot(W2.T, A1) + b2
    Yhat = softmax(Z2)

    # In chi phí sau mỗi 1000 lần lặp
    if i % 1000 == 0:
        loss = cost(Y, Yhat)
        print("iter %d, loss: %f" % (i, loss))

    # Backpropagation
    E2 = (Yhat - Y) / N
    dW2 = np.dot(A1, E2.T)
    db2 = np.sum(E2, axis=1, keepdims=True)
    E1 = np.dot(W2, E2)
    E1[Z1 <= 0] = 0  # Gradient của ReLU
    dW1 = np.dot(X_train, E1.T)
    db1 = np.sum(E1, axis=1, keepdims=True)

    W1 += -eta * dW1
    b1 += -eta * db1
    W2 += -eta * dW2
    b2 += -eta * db2

Z1 = np.dot(W1.T, X_train) + b1
A1 = np.maximum(Z1, 0)
Z2 = np.dot(W2.T, A1) + b2
predicted_class = np.argmax(Z2, axis=0)
acc = 100 * np.mean(predicted_class == y_train)
print('Training accuracy: %.2f %%' % (acc))

Z1_test = np.dot(W1.T, X_test) + b1
A1_test = np.maximum(Z1_test, 0)
Z2_test = np.dot(W2.T, A1_test) + b2
predicted_class_test = np.argmax(Z2_test, axis=0)
test_acc = 100 * np.mean(predicted_class_test == y_test)
print('Test accuracy: %.2f %%' % (test_acc))


iter 0, loss: 1.098858
iter 1000, loss: 0.301441
iter 2000, loss: 0.135906
iter 3000, loss: 0.101863
iter 4000, loss: 0.089456
iter 5000, loss: 0.083202
iter 6000, loss: 0.079436
iter 7000, loss: 0.076900
iter 8000, loss: 0.075056
iter 9000, loss: 0.073636
Training accuracy: 98.10 %
Test accuracy: 100.00 %
