In [1]:
from datasets.mnist import MNIST, normalize_mnist

from nn.model import Sequential

from nn.activations import relu, tanh, sigmoid, softmax, leaky_relu
from nn.layers import Dense
from nn.loss import MeanSquaredError, CategoricalCrossEntropy, CrossEntropyLoss, MeanAbsoluteError
from nn.optimizer import GradientDescent
from nn.preprocessing import categorical_encoding, transform_input_data, normalize
from nn.utils import dummy_callable

import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import Normalizer

# import warnings
# warnings.filterwarnings("error")

In [2]:
data = MNIST(mode='full')

All required files are exist!


In [3]:

train_data, test_data = data.dataset
X_train, y_train = train_data
X_test, y_test = test_data

print('Train:', X_train.shape, y_train.shape)
print('Test:', X_test.shape, y_test.shape)

print('Before norm:', np.min(X_train), np.max(X_train))
# scaler = Normalizer()
# scaler.fit(X_train)
X_train = normalize_mnist(X_train)
X_test_ = normalize_mnist(X_test)

print('After norm:', np.min(X_train), np.max(X_train))

X_test_ = transform_input_data(X_test_)
X_train_ = transform_input_data(X_train)

y_train_ = categorical_encoding(y_train)
y_train_ = transform_input_data(y_train_)

print('Training shapes')
print('X_train:', X_train_.shape)
print('y_train:', y_train_.shape)


Train: (60000, 784) (60000,)
Test: (10000, 784) (10000,)
Before norm: 0 255
After norm: 0.01 1.0
Training shapes
X_train: (60000, 784, 1)
y_train: (60000, 10, 1)


In [15]:
nn = Sequential(input_shape=(1, 784),
                layers=[
                    Dense(200, activation=leaky_relu),
                    Dense(10, activation=softmax)

                ],
                optimizer=GradientDescent(0.1),
                loss=CrossEntropyLoss())

nn.info()

+-------------------------------------------------------------------+
|                         model: Sequential                         |
+------------+---------------+------------+------------+------------+
| Layer type | Weights shape | Bias shape | W strategy | Activation |
+------------+---------------+------------+------------+------------+
|   Dense    |   (200, 784)  |  (200, 1)  |   xavier   | leaky_relu |
|   Dense    |   (10, 200)   |  (10, 1)   |   xavier   |  softmax   |
+------------+---------------+------------+------------+------------+
|  Optimizer | GradientDescent                                      |
+------------+------------------------------------------------------+
|     Loss   | CrossEntropyLoss                                     |
+------------+------------------------------------------------------+


In [16]:
for wl in nn.weights.layers:
    print(wl.weights.mean())

0.0003375588526758321
-0.004639074070689696


In [17]:
y_hat = nn.forward(X_train_[0])
y_hat

array([[3.44909607e-03],
       [2.70542842e-03],
       [7.33895147e-02],
       [3.72994761e-02],
       [8.70963285e-01],
       [1.17522053e-02],
       [1.53775834e-04],
       [2.48310030e-05],
       [5.47269713e-06],
       [2.56914499e-04]])

In [18]:
y_train_[0]

array([[0.01],
       [0.01],
       [0.01],
       [0.01],
       [0.01],
       [0.99],
       [0.01],
       [0.01],
       [0.01],
       [0.01]])

In [19]:
nn.fit(X_train_, y_train_, epochs=5)

<class 'numpy.ndarray'> and <class 'numpy.ndarray'> types as input data
Start training for 5 epochs


Epoch 1: 100%|██████████| 60000/60000 [01:10<00:00, 845.08samples/s]


CrossEntropyLoss: 0.5166233259834582


Epoch 2: 100%|██████████| 60000/60000 [01:11<00:00, 834.71samples/s]


CrossEntropyLoss: 0.5553806646836031


Epoch 3: 100%|██████████| 60000/60000 [01:11<00:00, 834.46samples/s]


CrossEntropyLoss: 0.5350611014861872


Epoch 4: 100%|██████████| 60000/60000 [01:12<00:00, 826.32samples/s]


CrossEntropyLoss: 1.1044705300627717


Epoch 5: 100%|██████████| 60000/60000 [01:11<00:00, 835.28samples/s]

CrossEntropyLoss: 0.9100256580342349





In [20]:
for wl in nn.weights.layers:
    print(wl.weights.mean())

-0.129471601600262
-0.004639074070689606


In [21]:
nn.stat['mean_w1']

[-0.004639074070689699,
 -0.004639074070689698,
 -0.0046390740706896945,
 -0.0046390740706897,
 -0.004639074070689695,
 -0.004639074070689698,
 -0.0046390740706896945,
 -0.0046390740706896945,
 -0.004639074070689696,
 -0.0046390740706896945,
 -0.0046390740706896945,
 -0.004639074070689693,
 -0.004639074070689693,
 -0.004639074070689698,
 -0.004639074070689693,
 -0.0046390740706896945,
 -0.004639074070689695,
 -0.004639074070689696,
 -0.004639074070689693,
 -0.004639074070689693,
 -0.004639074070689691,
 -0.004639074070689694,
 -0.004639074070689694,
 -0.004639074070689695,
 -0.004639074070689691,
 -0.004639074070689689,
 -0.004639074070689693,
 -0.0046390740706896945,
 -0.0046390740706896945,
 -0.004639074070689693,
 -0.004639074070689693,
 -0.004639074070689694,
 -0.00463907407068969,
 -0.004639074070689691,
 -0.004639074070689691,
 -0.004639074070689691,
 -0.004639074070689688,
 -0.0046390740706896885,
 -0.004639074070689688,
 -0.004639074070689684,
 -0.004639074070689686,
 -0.004639

In [22]:
from sklearn.metrics import accuracy_score

def calc_accuracy(y_true: np.ndarray, x_test) -> float:
    y_hats = []
    for x_i in x_test:
        y_hat = nn.predict(x_i)
        y_hats.append(np.argmax(y_hat))
    
    acc = accuracy_score(y_true, y_hats)
    return acc, y_hats

In [23]:
acc, y1 = calc_accuracy(y_test, X_test_)
acc

0.8233

In [24]:
acc, y2 = calc_accuracy(y_train, X_train_)
acc

0.82545

In [48]:
y2

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [51]:
X_train_[0]

array([[0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.   

In [11]:
y_ = nn.predict(X_train_[4])
np.argmax(y_)

7

In [12]:
y_train_[4]

array([[0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]])

In [1]:
from nn.activations import Relu, relu

In [6]:
a = Relu()
a.df(0.1)

1

In [5]:
relu(0.1, derivative=True)

1.0