In [1]:
import numpy as np

In [2]:
x = np.array(12)
x

array(12)

In [3]:
x.ndim

0

In [4]:
x.shape

()

In [5]:
x.size

1

In [6]:
y = np.array([1, 2, 3])
y

array([1, 2, 3])

In [7]:
y.ndim

1

In [8]:
y.shape

(3,)

In [9]:
y.size

3

In [10]:
z = np.array([[1, 2, 3, 4],
              [5, 6, 7 ,8],
              [9, 10, 11, 12]])
z

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [11]:
z.ndim

2

In [12]:
z.shape

(3, 4)

In [13]:
z.size

12

In [14]:
u = np.array([[[1, 2, 3],
               [4, 5, 6]],
               [[7, 8, 9],
                [10, 11, 12]],
                [[13, 14, 15],
                 [16, 17, 18]],
                 [[19, 20, 21],
                  [22, 23, 24]]])
u

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]],

       [[13, 14, 15],
        [16, 17, 18]],

       [[19, 20, 21],
        [22, 23, 24]]])

In [15]:
u.ndim

3

In [16]:
u.shape

(4, 2, 3)

In [17]:
u.size

24

In [18]:
u[0,0,0:3].shape

(3,)

In [19]:
a = [0, 1, 2, 3, 4, 5]
a[2:-1]

[2, 3, 4]

In [20]:
import tensorflow as tf

class NaiveDense:
    def __init__(self, input_size, output_size, activation) -> None:
        self.activation = activation

        w_shape = (input_size, output_size)
        w_initial_value = tf.random.uniform(w_shape, minval = 0, maxval=0.1)
        self.W = tf.Variable(w_initial_value)

        b_shape = (output_size, )
        b_initial_value = tf.zeros(b_shape)
        self.b = tf.Variable(b_initial_value)

    def __call__(self, inputs):
        return self.activation(tf.matmul(inputs, self.W) + self.b)
    
    @property
    def weights(self):
        return [self.W, self.b]

2023-08-15 11:08:57.684358: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-15 11:08:57.706874: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-15 11:08:57.707307: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [21]:
from typing import Any


class NaiveSequential:
    def __init__(self, layers) -> None:
        self.layers = layers
    
    def __call__(self, inputs) -> Any:
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x
    
    @property
    def weights(self):
        weights = []
        for layer in self.layers:
            weights += layer.weights
        return weights

In [22]:
model = NaiveSequential([
    NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),
    NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)
])
assert len(model.weights) == 4

2023-08-15 11:08:58.686077: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-15 11:08:58.686377: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [23]:
import math

class BatchGenerator:
    def __init__(self, images, labels, batch_size=128) -> None:
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / batch_size)

    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

In [24]:
def one_training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    gradients = tape.gradient(average_loss, model.weights)
    update_weights(gradients, model.weights)
    return average_loss

def update_weights(gradients, weights, learning_rate=0.001):
    for g, w in zip(gradients, weights):
        w.assign_sub(w - g * learning_rate)

In [25]:
from keras import optimizers

optimizer = optimizers.SGD(learning_rate=0.001)

def update_weights(gradients, weights):
    optimizer.apply_gradients(zip(gradients, weights))

In [26]:
def fit(model, images, labels, epoches, batch_size=128):
    for epoch_counter in range(epoches):
        print(f'Epoch {epoch_counter}')
        batch_generator = BatchGenerator(images, labels)
        for batch_counter in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next()
            loss = one_training_step(model, images_batch, labels_batch)
            if batch_counter % 100 == 0:
                print(f'loss at batch {batch_counter}: {loss:.2f}')

In [27]:
from keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((train_images.shape[0], -1))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((test_images.shape[0], -1))
test_images = test_images.astype('float32') / 255

fit(model, train_images, train_labels, epoches=10, batch_size=128)

Epoch 0
loss at batch 0: 3.57
loss at batch 100: 2.28
loss at batch 200: 2.26
loss at batch 300: 2.12
loss at batch 400: 2.23
Epoch 1
loss at batch 0: 1.92
loss at batch 100: 1.91
loss at batch 200: 1.88
loss at batch 300: 1.75
loss at batch 400: 1.84
Epoch 2
loss at batch 0: 1.60
loss at batch 100: 1.61
loss at batch 200: 1.56
loss at batch 300: 1.46
loss at batch 400: 1.52
Epoch 3
loss at batch 0: 1.34
loss at batch 100: 1.36
loss at batch 200: 1.29
loss at batch 300: 1.23
loss at batch 400: 1.29
Epoch 4
loss at batch 0: 1.13
loss at batch 100: 1.17
loss at batch 200: 1.08
loss at batch 300: 1.07
loss at batch 400: 1.12
Epoch 5
loss at batch 0: 0.98
loss at batch 100: 1.03
loss at batch 200: 0.93
loss at batch 300: 0.94
loss at batch 400: 1.00
Epoch 6
loss at batch 0: 0.87
loss at batch 100: 0.92
loss at batch 200: 0.82
loss at batch 300: 0.85
loss at batch 400: 0.91
Epoch 7
loss at batch 0: 0.79
loss at batch 100: 0.83
loss at batch 200: 0.74
loss at batch 300: 0.77
loss at batch 40

In [28]:
predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f'accuracy: {matches.mean():.2f}')

accuracy: 0.81
