In [1]:
import numpy as np
from sklearn.datasets import make_classification

In [2]:
X, y = make_classification(n_samples=1000)

In [3]:
def normalize(X):
    X = np.asarray(X)
    X_normalized = X / np.linalg.norm(X, axis=1, keepdims=True)
    return X_normalized

def train_test_split(X, y, test_size=0.25, shuffle=True):
    X, y = np.asarray(X), np.asarray(y)
    n = len(X)
    if shuffle:
        perm = np.random.permutation(n)
        X, y = X[perm], y[perm]
    test_rows = round(test_size * n)
    X_train, X_test, y_train, y_test = X[test_rows:], X[:test_rows], y[test_rows:], y[:test_rows]
    return X_train, X_test, y_train, y_test

In [4]:
X = normalize(X)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [16]:
class MLPClassifier:
    def __init__(self, hl=(100,), lr=0.1, verbose=True):
        self.hl_ = hl
        self.lr_ = lr
        self.verbose_ = verbose
    
    def __relu(self, Z):
        res = np.maximum(0, Z)
        return res
    
    def __d_relu(self, Z):
        Z[Z <= 0] = 0
        Z[Z > 0] = 1
        return Z
    
    def __sigmoid(self, Z):
        Z = 1 / (1 + np.exp(-Z))
        return Z
    
    def __predict(self):
        A = self.X_
        for i in range(len(self.w_)):
            Z = A.dot(self.w_[i][1:]) + self.w_[i][0]
            self.z_[i] = Z
            if i < len(self.w_) - 1:
                A = self.__relu(Z)
                self.a_[i] = A
            else:
                A = self.__sigmoid(Z)
        return A
    
    def __loss(self):
        preds = self.__predict()
        total_loss = -(self.y_ * np.log(1e-15 + preds)).sum()
        loss = 1 / self.n_ * total_loss
        return loss
    
    def __iterate(self):
        preds = self.__predict()
        gradients = np.array([])
        d_preds = -self.y_ / preds
        d_sigmoid = preds * (1 - preds)
        for i in range(len(self.w_) - 2, -1, -1):
            Z = self.z_[i]
            if i == len(self.w_) - 2:
                d_Z = d_preds * d_sigmoid
            else:
                d_A = d_Z.dot(self.w_[i][1:].T)
                d_Z = d_A * self.__d_relu(self.Z_[i])
            if i > 0:
                d_W = self.a_[-1].T.dot(d_Z)
            else:
                d_W = self.X_.T.dot(d_Z)
            d_b = d_Z.sum(0)
            print(d_W.shape, d_b.shape)
    
    def fit(self, X, y):
        self.X_ = X
        self.y_ = y
        self.n_ = len(y)
        self.n_classes_ = len(np.unique(self.y_))
        self.binary_ = self.n_classes_ == 2
        self.layers_ = (self.X_.shape[1],) + self.hl_ + (1 if self.binary_ else self.n_classes_,)
        self.w_ = np.array([np.random.randn(self.layers_[i] + 1, self.layers_[i + 1]) * np.sqrt(2 / self.layers_[i]) for i in range(len(self.layers_) - 1)])
        
        self.__iterate()
        

In [17]:
clf = MLPClassifier()

In [18]:
clf.fit(X_train, y_train)

ValueError: setting an array element with a sequence.

In [19]:
arr = np.empty(5)

In [24]:
np.append(np.array([]), np.array([[1, 2], [3, 4]]))

array([1., 2., 3., 4.])