In [168]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [50]:
x = load_digits()['data']
y = load_digits()['target']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33)

In [385]:
class DNN:
    
    def __init__(self, layers):
        self.w = self._init_w(layers)
        self.b = self._init_b(layers)
    
    # initialize weights
    def _init_w(self, layers):
        w = []
        for i in range(len(layers)-1):
            w.append(np.ones((layers[i], layers[i+1])))
        return w
    
    # initialize biases
    def _init_b(self, layers):
        b = []
        for i in range(len(layers)-1):
            b.append(np.ones((1, layers[i+1])))
        return b
    
    # activation
    def _activate(self, z):
        return 1 / (1 + np.exp(-z))
    
    # derivative of activation
    def _derivative(self, z):
        return self._activate(z) * (1-self._activate(z))
    
    # feed forward
    def _forward(self, x):
        z = [x]
        for i in range(len(self.w)):
            x = np.dot(x, self.w[i]) + self.b[i]
            z.append(x)
            x = self._activate(x)
        return x, z
    
    # backpropagation
    def _backprop(self, y, t, z):
        e = (y - t) * self._derivative(z[-1])
        delta_b = [e]
        delta_w = [e*z[-2].T]
        for i in range(2, len(self.w)+1):
            e = np.dot(e, self.w[-i].T) * self._derivative(z[-i])
            delta_b.append(e)
            delta_w.append(e*z[-(i+1)])
        return delta_w[::-1], delta_b[::-1]
    
    # evaluate the result
    def _evaluate(self, x, t):
        y, _ = self._forward(x)
        result = [(np.argmax(i), np.argmax(j)) for (i, j) in zip(y, t)]
        return sum(int(y == t) for (y, t) in result) / len(t)
    
    # train with mini batch
    def train(self, x, y, epochs):
        for i in range(epochs):
            for j in range(len(y)):
                x_, z_ = self._forward(x[j])
                delta_w, delta_b = self._backprop(x_, y[j], z_)
                print(self.w[0].shape, delta_w[0].shape, )
                self.w = [(i+j) for (i, j) in zip(self.w, delta_w)]
                self.b = [(i+j) for (i, j) in zip(self.b, delta_b)]
            print (self._evaluate(x, y))

In [386]:
d = DNN([64, 8, 10])

In [387]:
d.train(x_train, y_train, 10)

ValueError: shapes (1,10) and (8,64) not aligned: 10 (dim 1) != 8 (dim 0)

In [333]:
ta[:50]

[array([ 0.,  0., 11., 16., 16., 12.,  0.,  0.,  0.,  3., 16.,  7., 14.,
        16.,  1.,  0.,  0.,  0., 15., 14., 15., 16.,  6.,  0.,  0.,  0.,
         2., 10.,  9., 15.,  9.,  0.,  0.,  0.,  0.,  0.,  0.,  9., 10.,
         0.,  0.,  0.,  0.,  0.,  0., 12.,  8.,  0.,  0.,  0.,  6., 11.,
         6., 15.,  5.,  0.,  0.,  0.,  9., 16., 16., 12.,  0.,  0.]),
 array([[359., 359., 359., 359., 359., 359., 359., 359.]]),
 array([[9., 9., 9., 9., 9., 9., 9., 9., 9., 9.]])]

In [309]:
ta[-2].shape

(1, 8)

In [310]:
a1, a2 = d._backprop(ty[0], ytt[0], d.w, d.b, ta)

In [311]:
ty[0]

array([0.99987661, 0.99987661, 0.99987661, 0.99987661, 0.99987661,
       0.99987661, 0.99987661, 0.99987661, 0.99987661, 0.99987661])

In [312]:
np.mean(a1[0]).shape

()

In [313]:
for i in a1:
    print(i.shape)

(1, 64)
(1, 8)
(1, 10)


In [314]:
for i in a2:
    print(i.shape)

(1, 64)
(1, 8)
(1, 10)


In [305]:
a2[2]

0.00011102619044588751

In [180]:
encoder = OneHotEncoder()

In [190]:
ytt = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()

In [213]:
d._derivative(ta[-1]).shape

(1203, 10)

In [133]:
for i in d.w:
    print(i.shape)

(64, 8)
(8, 10)


In [151]:
for i in b:
    print(i.shape)

(1203, 64)
(1203, 8)
(1203, 10)


In [316]:

np.argmax(a[0])

0

In [324]:
d._one_hot(a[0])

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])