In [1]:
from datasets.mnist import MNIST, normalize_mnist

from nn.model import Sequential

from nn.activations import relu, tanh, sigmoid, softmax, leaky_relu, s_softmax
from nn.layers import Dense
from nn.loss import MeanSquaredError, CrossEntropyLoss, MeanAbsoluteError
from nn.optimizer import GradientDescent
from nn.preprocessing import categorical_encoding, transform_input_data, normalize
from nn.utils import dummy_callable

import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import Normalizer

# import warnings
# warnings.filterwarnings("error")

In [2]:
data = MNIST(mode='full')

All required files are exist!


In [3]:

train_data, test_data = data.dataset
X_train, y_train = train_data
X_test, y_test = test_data

print('Train:', X_train.shape, y_train.shape)
print('Test:', X_test.shape, y_test.shape)

print('Before norm:', np.min(X_train), np.max(X_train))
# scaler = Normalizer()
# scaler.fit(X_train)
X_train = normalize_mnist(X_train)
X_test_ = normalize_mnist(X_test)

print('After norm:', np.min(X_train), np.max(X_train))

X_test_ = transform_input_data(X_test_)
X_train_ = transform_input_data(X_train)

y_train_ = categorical_encoding(y_train)
y_train_ = transform_input_data(y_train_)

print('Training shapes')
print('X_train:', X_train_.shape)
print('y_train:', y_train_.shape)


Train: (60000, 784) (60000,)
Test: (10000, 784) (10000,)
Before norm: 0 255
After norm: 0.01 1.0
Training shapes
X_train: (60000, 784, 1)
y_train: (60000, 10, 1)


In [4]:
nn = Sequential(input_shape=(1, 784),
                layers=[
                    Dense(32, activation=leaky_relu),
                    Dense(32, activation=leaky_relu),
                    Dense(10, activation=s_softmax)
                ],
                optimizer=GradientDescent(0.05),
                loss=CrossEntropyLoss())

nn.info()

+--------------------------------------------------------------------+
|                         model: Sequential                          |
+------------+---------------+------------+------------+-------------+
| Layer type | Weights shape | Bias shape | W strategy |  Activation |
+------------+---------------+------------+------------+-------------+
|   Dense    |   (32, 784)   |  (32, 1)   |   xavier   |  leaky_relu |
|   Dense    |   (32, 32)    |  (32, 1)   |   xavier   |  leaky_relu |
|   Dense    |   (10, 32)    |  (10, 1)   |   xavier   |   s_softmax |
+------------+---------------+------------+------------+-------------+
|  Optimizer | GradientDescent                                       |
+------------+-------------------------------------------------------+
|     Loss   | CrossEntropyLoss                                      |
+------------+-------------------------------------------------------+


In [5]:
nn.weights.wl2

(10, 32) | (10, 1) | xavier | s_softmax

In [28]:
for wl in nn.weights.layers:
    print(wl.weights.mean())

-0.0003008822092974713
-0.008375903424341968
0.0338111181628268


In [29]:
y_hat = nn.forward(X_train_[0])
y_hat

array([[1.35939616e-02],
       [4.81149940e-01],
       [2.04440871e-02],
       [1.20386284e-01],
       [2.71261112e-01],
       [3.61467789e-02],
       [3.01013631e-02],
       [1.28724181e-03],
       [2.54708186e-02],
       [1.58413490e-04]])

In [30]:
s_softmax(y_hat[0])

array([1.])

In [31]:
s_softmax.df(y_hat[0]).shape

(1, 1)

In [32]:
nn.fit(X_train_, y_train_, epochs=5)

<class 'numpy.ndarray'> and <class 'numpy.ndarray'> types as input data
Start training for 5 epochs


Epoch 1: 100%|██████████| 60000/60000 [00:48<00:00, 1241.01samples/s]


CrossEntropyLoss: 0.5122774223861558


Epoch 2: 100%|██████████| 60000/60000 [00:48<00:00, 1233.35samples/s]


CrossEntropyLoss: 0.5147282688582581


Epoch 3: 100%|██████████| 60000/60000 [00:48<00:00, 1243.59samples/s]


CrossEntropyLoss: 0.5130067843385026


Epoch 4: 100%|██████████| 60000/60000 [00:48<00:00, 1238.79samples/s]


CrossEntropyLoss: 0.5118783019903779


Epoch 5: 100%|██████████| 60000/60000 [00:48<00:00, 1232.42samples/s]

CrossEntropyLoss: 0.5097161562001253





In [33]:
for wl in nn.weights.layers:
    print(wl.weights.mean())
    

-0.05205016208829907
-0.30543604228312127
0.03381111816282724


In [34]:
nn.stat['mean_w0']

[-0.00015577411478098622,
 -0.00044043438972253855,
 -0.0004740437538010656,
 -0.00042380226853017106,
 -0.0007943692982357716,
 -0.0009254346122077827,
 -0.0007969358814729988,
 -0.0012797994301696769,
 -0.0011728517384187779,
 -0.001408327564528138,
 -0.001500627774770544,
 -0.001287444657781499,
 -0.0013850009978708655,
 -0.0012666607917473575,
 -0.0012350305660703486,
 -0.0011487774910031718,
 -0.0011928921734155626,
 -0.001379813334036577,
 -0.0012949730427958062,
 -0.0013168589272490865,
 -0.0012510774789464899,
 -0.0013364113455468759,
 -0.0011551939737692641,
 -0.0010896059765720429,
 -0.0009854751514984132,
 -0.0009129573122899424,
 -0.0012645758495005485,
 -0.0016240628494811782,
 -0.0017323599738323112,
 -0.0016759235607682905,
 -0.0016724149685277275,
 -0.0016071475113099902,
 -0.0016411056654470017,
 -0.0016721389276484894,
 -0.0019091672463330902,
 -0.001785483829723193,
 -0.001867064426517427,
 -0.0018492501689463604,
 -0.0019045882705740474,
 -0.0018912313115276003,
 -0

In [35]:
from sklearn.metrics import accuracy_score

def calc_accuracy(y_true: np.ndarray, x_test) -> float:
    y_hats = []
    for x_i in x_test:
        y_hat = nn.predict(x_i)
        y_hats.append(np.argmax(y_hat))
    
    acc = accuracy_score(y_true, y_hats)
    return acc, y_hats

In [36]:
acc, y1 = calc_accuracy(y_test, X_test_)
acc

0.9351

In [37]:
acc, y2 = calc_accuracy(y_train, X_train_)
acc

0.9403166666666667

In [48]:
y2

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [51]:
X_train_[0]

array([[0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.   

In [11]:
y_ = nn.predict(X_train_[4])
np.argmax(y_)

7

In [12]:
y_train_[4]

array([[0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]])

In [1]:
from nn.activations import Relu, relu

In [6]:
a = Relu()
a.df(0.1)

1

In [5]:
relu(0.1, derivative=True)

1.0