In [None]:
import numpy as np
from tqdm import tqdm
np.set_printoptions(precision=4)
np.random.seed(0)

In [None]:
np.random.seed(0)
N = 100 # number of points per class
D = 2 # dimensionality
K = 3 # number of classes
X = np.zeros((N*K,D))
y = np.zeros(N*K, dtype='uint8')
for j in range(K):
  ix = range(N*j,N*(j+1))
  r = np.linspace(0.0,1,N) # radius
  t = np.linspace(j*4,(j+1)*4,N) + np.random.randn(N)*0.2 # theta
  X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
  y[ix] = j

x_train = X.T
y_train = np.eye(len(set(y)))[y]
y_train = y_train.T

In [None]:
class Layer:
    def __init__(self) -> None:
        pass
           
    def forward(self, input_data):
        raise NotImplementedError
    
    def backward(self, grad, alpha):
        raise NotImplementedError
        
class ZLayer(Layer):
    def __init__(self, inSize, outSize):
        np.random.seed(0)
        self.W = 0.01 * np.random.randn(outSize, inSize)
        self.B = 0.0 * np.random.randn(outSize, 1)
        
    def forward(self, inData):
        self.input = inData
        self.output = np.dot(self.W, self.input) + self.B
        return self.output
        
    def backward(self, outGrad, alpha=1e-0):
        inGrad = np.dot(self.W.T, outGrad)
        dW = np.dot(outGrad, self.input.T) # /self.input.shape[1]
        dB = np.sum(outGrad, axis=1, keepdims=True)
        self.dW = dW
        self.dB = dB
        self.W -= alpha * dW
        self.B -= alpha * dB
        return inGrad        
    
class ALayer(Layer):
    def __init__(self, g, gPrime):
        self.g = g
        self.gPrime = gPrime

    def forward(self, inData):
        self.input = inData
        self.output = self.g(self.input)
        return self.output

    def backward(self, outGrad):
        return self.gPrime(self.input) * outGrad

class SLayer(ALayer):
    def backward(self, outGrad):
        #return np.einsum('ijk,ik->ij', self.gPrime(self.output), outGrad)
        return np.einsum('jik,kj->ij', self.gPrime(self.output), outGrad)
    
class LLayer(Layer):
    def __init__(self, g, gPrime, eps=0):
        self.g = g
        self.gPrime = gPrime
        self.eps = eps

    def forward(self, pred, obs):
        self.input = pred
        self.output = self.g(pred, obs, self.eps)
        return self.output

    def backward(self, pred, obs):
        return self.gPrime(pred, obs, self.eps)    

In [None]:
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

def mse(y_obs, y_pred):
    return np.mean(np.power(y_obs - y_pred, 2));

def mse_prime(y_obs, y_pred):
    return 2*(y_pred - y_obs) / y_obs.size;

def softmax(x):
    tmp = np.exp(x)
    return tmp / np.sum(tmp, axis=0, keepdims=True)

def softmax_prime(x):
    # X(n,m) : matrix of softmax probabilities, #n classes, #m samples 
    i = np.eye(x.shape[0])               # Identity matrix with n dimensions
    tmp1 = np.einsum('ij,ik->jik', x, i)  # Matrix of diagolized softmax values (per m)
    tmp2 = np.einsum('ij,kj->jik', x, x)  # Matrix of cross product of softmax values (per m)
    return tmp1 - tmp2 

def centropy(x, y, eps=1e-8):
    #nobs = pred.shape[1]
    #correct_logp = -np.log(pred[obs, range(nobs)])
    #return np.sum(correct_logp) / nobs
    x = x.clip(min=eps, max=None)
    logp = np.where(y==1, -np.log(x), 0)
    return logp.sum(axis=0)

def centropy_prime(x, y, eps=0):
    x = x.clip(min=eps, max=None)
    return np.where(y==1, -1/x, 0)


In [None]:
Z1 = ZLayer(2, 3)
Z1.forward(x_train)

A1 = SLayer(softmax, softmax_prime)
A1.forward(Z1.output)

L1 = LLayer(centropy, centropy_prime)
L1.forward(A1.output, y_train)

grad_ce = L1.backward(A1.output, y_train)
grad_sm = A1.backward(grad_ce)/grad_ce.shape[1]
grad_dW = Z1.backward(grad_sm)

print('probs.T \n', A1.output[:,0:5].T, A1.output.shape, '\n')
print('y_train.T \n', y_train[:,0:5].T, y_train.shape, '\n')

print('CE \n', L1.output[0:5], L1.output.shape, '\n')
print('loss \n', f"{np.sum(L1.output)/L1.output.shape[0] : .6f} \n")

print('grad_ce.T \n', grad_ce[:,0:5].T, grad_ce.shape, '\n')
print('grad_sm.T \n', grad_sm[:,0:5].T, grad_sm.shape, '\n')

print('dW.T \n', Z1.dW.T, Z1.dW.shape, '\n')
print('dB.T \n', Z1.dB.T, Z1.dB.shape, '\n')
print('W.T \n', Z1.W.T, Z1.W.shape, '\n')
print('B.T \n', Z1.B.T, Z1.B.shape, '\n')

In [12]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None
        
    def add(self, layer):
        self.layers.append(layer)

    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime
        
    def fit(self, x_train, y_train, epochs, learning_rate, verbose=False):
        #for i in tqdm(range(epochs)):
        for i in range(epochs):
        #for i in range(epochs):
            output = x_train
            for layer in self.layers:
                output = layer.forward(output)

            err = self.loss(output, y_train)
            err = np.sum(err) / err.shape[0]
            grad = self.loss_prime(output, y_train)
            grad = grad / grad.shape[1]
            for layer in reversed(self.layers):
                grad = layer.backward(grad)
        
            #if i2 % (epochs/10) == 0:
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))


nn = Network()
nn.add(ZLayer(2, 3))
nn.add(SLayer(softmax, softmax_prime))

nn.use(centropy, centropy_prime)

nn.fit(x_train, y_train, 2, 1)

#for l in nn.layers:
#    print(l.input)

epoch 1/2   error=1.097088
epoch 2/2   error=1.068715


In [None]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None
        
    def add(self, layer):
        self.layers.append(layer)

    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime
        
    def fit(self, x_train, y_train, epochs, learning_rate, verbose=False):
        for i in tqdm(range(epochs)):
        #for i in range(epochs):
            output = x_train
            for layer in self.layers:
                output = layer.forward(output)

            err = self.loss(y_train, output)
            grad = self.loss_prime(y_train, output)
            for layer in reversed(self.layers):
                grad = layer.backward(grad, learning_rate)
        
            if i % (epochs/10) == 0:
                print('epoch %d/%d   error=%f' % (i+3, epochs, err))

    
nn = Network()
nn.add(ZLayer(2, 3))
nn.add(ALayer(tanh, tanh_prime))
nn.add(ZLayer(3, 1))
nn.add(ALayer(tanh, tanh_prime))

nn.use(mse, mse_prime)

nn.fit(x_train, y_train, 10000, .1)

#for l in nn.layers:
#    print(l.input)

In [None]:
np.random.seed(0)
x = np.random.randint(5, size=(2,3))
s = softmax(x)
print('s :\n\n', s, '\n')

sp = softmax_prime(s)
print('s_prime :\n\n', sp, '\n')

y = np.array([[1, 1, 0], [0, 0, 1]])
print(y)

c = centropy(s, y)
print('c :\n\n', c, '\n')

cp = centropy_prime(s, y)
print('c_prime :\n\n', cp, '\n')

np.einsum('jik,kj->ij', sp, cp)

In [None]:
np.set_printoptions(precision=4)
np.random.seed(0)

x_train = np.array([[0,0], [0,1], [1,0], [1,1]]).T
y_train = np.array([[0], [1], [1], [0]]).T

L1 = FCLayer(2, 3)
L1.forward(x_train)
#print(L1.W)
#print(L1.B)
print(L1.input)
print(L1.output)

A1 = ActivationLayer(tanh, tanh_prime)
A1.forward(L1.output)
print(A1.output)

L2 = FCLayer(3, 1)
L2.forward(A1.output)

A2 = ActivationLayer(tanh, tanh_prime)
A2.forward(L2.output)
print(A2.output)



In [None]:
import numpy as np
np.set_printoptions(precision=4)
#%matplotlib inline

#from network import Network
#from fc_layer import FCLayer
#from activation_layer import ActivationLayer
#from activations import tanh, tanh_prime
#from losses import mse, mse_prime

# training data
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

# network
net = Network()
net.add(FCLayer(2, 3))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(3, 1))
net.add(ActivationLayer(tanh, tanh_prime))

# train
net.use(mse, mse_prime)
net.fit(x_train, y_train, epochs=1000, learning_rate=0.1)

# test
out = net.predict(x_train)
print(out)

In [None]:
class Network_:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward(output)
            result.append(output)

        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate, verbose=False):
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in tqdm(range(epochs)):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            if verbose:
                print('epoch %d/%d   error=%f' % (i+1, epochs, err))