<a href="https://colab.research.google.com/github/ryuhyunwoo1/classDeepLearning/blob/main/practices/Practices_week3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Week 3

## The gears of neural networks

### Element-wise operations 要素単位の操作

In [2]:
def naive_relu(x):
  assert len(x.shape) == 2
  x = x.copy()
  for i in range(x.shape[0]):
    for j in range(x.shape[1]):
      x[i, j] = max(x[i, j], 0)
  return x

In [1]:
import numpy as np
aa = np.array( [ [1,2,3,-1] , [1,-1,-1,0] ])

In [None]:
aa.shape[0]

In [None]:
aa.shape[1]

In [3]:
naive_relu(aa)

array([[1, 2, 3, 0],
       [1, 0, 0, 0]])

In [7]:
def naive_add(x, y):
  assert len(x.shape) == 2
  assert x.shape == y.shape
  x = x.copy()
  for i in range(x.shape[0]):
    for j in range(x.shape[1]):
      x[i, j] += y[i, j]
  return x

In [11]:
import time
x = np.random.random( (20, 100) )
y = np.random.random( (20, 100) )

現在時間 = time.time()
for _ in range(1000):
  z = x + y
  z = np.maximum(z, 0.)
print("かかった時間: {0:.2f} s".format(time.time() - 現在時間))

かかった時間: 0.01 s


In [12]:
現在時間 = time.time()
for _ in range(1000):
  z = naive_add(x, y)
  z = naive_relu(z)
print("かかった時間: {0:.2f} s".format(time.time() - 現在時間))

かかった時間: 1.66 s


### Broadcasting

In [13]:
import numpy as np
x = np.array([1,2,3,4,5])
y = np.array([ [1,1,1,1,1], [1,1,1,1,1] ])

In [16]:
x.shape

(5,)

In [17]:
y.shape

(2, 5)

In [18]:
x + y

array([[2, 3, 4, 5, 6],
       [2, 3, 4, 5, 6]])

In [20]:
import numpy as np
X = np.random.random( (32, 10) )
y = np.random.random( (10, ))

In [28]:
## yは1次元だったが、新しい次元を追加して2次元にする
y = np.expand_dims(y, axis=0)
y

array([[[[[[0.81542428, 0.2762695 , 0.07490954, 0.78861081,
            0.30423674, 0.65350644, 0.66387545, 0.86989927,
            0.84552531, 0.55445347]]]]]])

In [None]:
## yをaxis=0で32回拡張
Y = np.concatenate([y]*32, axis=0)

In [29]:
## 行列の演算のため行と列の数を合わせる。まるで合コン(笑)
def naive_add_matrix_add_vector(x, y):
  assert len(x.shape) == 2
  assert len(y.shape) == 1
  assert x.shape[1] == y.shape[0]
  x = x.copy()
  for i in range(x.shape[0]):
    for j in range(x.shape[1]):
      x[i, j] += y[j]
  return x

In [30]:
import numpy as np
x = np.random.random( (64, 3, 32, 10) )
y = np.random.random( (32, 10) )
z = np.maximum(x, y)
z.shape

(64, 3, 32, 10)

### Tensor product

In [31]:
x = np.random.random( (32, ) )
y = np.random.random( (32, ) )
z = np.dot(x, y)

In [33]:
## x, y 二つともshape==1のVector
def naive_vector_dot(x, y):
  assert len(x.shape) == 1
  assert len(y.shape) == 1
  assert x.shape[0] == y.shape[0]
  z = 0.
  for i in range(x.shape[0]):
    z += x[i] * y[i]
  return z

In [34]:
def naive_matrix_vector_dot(x, y):
  assert len(x.shape) == 2
  assert len(y.shape) == 1
  z = np.zeros(x.shape[0])
  for i in range(x.shape[0]):
    z[i] = naive_vector_dot(x[i, :], y)
  return z

In [36]:
def naive_matrix_dot(x, y):
    assert len(x.shape) == 2
    assert len(y.shape) == 2
    assert x.shape[1] == y.shape[0]
    z = np.zeros((x.shape[0], y.shape[1]))
    for i in range(x.shape[0]):
        for j in range(y.shape[1]):
            row_x = x[i, :]
            column_y = y[:, j]
            z[i, j] = naive_vector_dot(row_x, column_y)
    return z

### Tensor reshaping

In [None]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [38]:
train_images = train_images.reshape( (60000, 28 * 28) )

In [40]:
x = np.array([[0., 1.],
              [2., 3.],
              [4., 5.]])
x.shape

(3, 2)

In [42]:
x = x.reshape( (6, 1) )
x

array([[0.],
       [1.],
       [2.],
       [3.],
       [4.],
       [5.]])

In [43]:
x = np.zeros( (300, 20) )
x = np.transpose(x)
x.shape

(20, 300)

## The engine of neural networks: gradient-based optimization

### Backpropagation algorithm

In [44]:
import tensorflow as tf
x = tf.Variable(0.)
with tf.GradientTape() as tape:
  y = 2 * x + 3
grad_of_y_wrt_x = tape.gradient(y, x)

grad_of_y_wrt_x

<tf.Tensor: shape=(), dtype=float32, numpy=2.0>

In [49]:
x = tf.Variable(tf.random.uniform((2,2)))
with tf.GradientTape() as tape:
  y = 2 * x + 3
grad_of_y_wrt_x = tape.gradient(y, x)

grad_of_y_wrt_x

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[2., 2.],
       [2., 2.]], dtype=float32)>

In [51]:
W = tf.Variable(tf.random.uniform((2,2)))
b = tf.Variable(tf.zeros((2,)))
x = tf.random.uniform((2,2))
with tf.GradientTape() as tape:
  y = tf.matmul(x, W) + b
grad_of_y_wrt_W_and_b = tape.gradient(y, [W,b])

grad_of_y_wrt_W_and_b

[<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
 array([[1.2604747, 1.2604747],
        [0.5118102, 0.5118102]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([2., 2.], dtype=float32)>]

## Example: week 2

### week 2

In [60]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
from tensorflow import keras
from tensorflow.keras import layers

In [61]:
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

In [62]:
model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(10, activation="softmax")
])

In [64]:
model.compile(optimizer = "rmsprop",
              loss = "sparse_categorical_crossentropy",
              metrics=["accuracy"])

In [None]:
model.fit(train_images, train_labels, epochs=2, batch_size=128)

### 再実装

#### A simple Dense class

In [67]:
import tensorflow as ff

class NaiveDense:
  def __init__(self, input_size, output_size, activation):
    self.activation = activation

    w_shape = (input_size, output_size)
    w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
    self.W = tf.Variable(w_initial_value)

    b_shape = (output_size, )
    b_initial_value = tf.zeros(b_shape)
    self.b = tf.Variable(b_initial_value)

  def __call__(self, inputs):
    return self.activation(tf.matmul(inputs, self.W) + self.b)

  @property
  def weights(self):
    return [self.W, self.b]

#### A simple Sequential class

In [68]:
class NaiveSequential:
  def __init__(self, layers):
    self.layers = layers

  def __call__(self, inputs):
    x = inputs
    for layer in self.layers:
      x = layer(x)
    return x

  @property
  def weights(self):
    weights = []
    for layer in self.layers:
      weights += layer.weights
    return weights

In [None]:
NaiveDense(input_size=28*28, output_size=512, activation=tf.nn.relu).weights

In [73]:
model = NaiveSequential([
    NaiveDense(input_size = 28*28, output_size = 512, activation=tf.nn.relu),
    NaiveDense(input_size = 512, output_size = 10, activation=tf.nn.softmax)
])
assert len(model.weights) == 4

#### A batch generator

In [74]:
import math

class BatchGenerator:
  def __init__(self, images, labels, batch_size=128):
    assert len(images) == len(labels)
    self.index = 0
    self.images = images
    self.labels = labels
    self.batch_size = batch_size
    self.num_batches = math.ceil(len(images) / batch_size)

  def next(self):
    images = self.images[self.index : self.index + self.batch_size]
    labels = self.labels[self.index : self.index + self.batch_size]
    self.index += self.batch_size
    return images, labels

#### Running one training step

In [76]:
def one_training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(
            labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    gradients = tape.gradient(average_loss, model.weights)
    update_weights(gradients, model.weights)
    return average_loss

In [77]:
learning_rate = 1e-3

def update_weights(gradients, weights):
    for g, w in zip(gradients, weights):
        w.assign_sub(g * learning_rate)

In [78]:
from tensorflow.keras import optimizers

optimizer = optimizers.SGD(learning_rate=1e-3)

def update_weights(gradients, weights):
    optimizer.apply_gradients(zip(gradients, weights))

#### Full training loop

In [79]:
def fit(model, images, labels, epochs, batch_size=128):
    for epoch_counter in range(epochs):
        print(f"Epoch {epoch_counter}")
        batch_generator = BatchGenerator(images, labels)
        for batch_counter in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next()
            loss = one_training_step(model, images_batch, labels_batch)
            if batch_counter % 100 == 0:
                print(f"loss at batch {batch_counter}: {loss:.2f}")

In [80]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

fit(model, train_images, train_labels, epochs=5, batch_size=128)

Epoch 0
loss at batch 0: 7.52
loss at batch 100: 2.23
loss at batch 200: 2.22
loss at batch 300: 2.08
loss at batch 400: 2.22
Epoch 1
loss at batch 0: 1.88
loss at batch 100: 1.87
loss at batch 200: 1.84
loss at batch 300: 1.70
loss at batch 400: 1.83
Epoch 2
loss at batch 0: 1.57
loss at batch 100: 1.57
loss at batch 200: 1.51
loss at batch 300: 1.41
loss at batch 400: 1.51
Epoch 3
loss at batch 0: 1.31
loss at batch 100: 1.34
loss at batch 200: 1.24
loss at batch 300: 1.20
loss at batch 400: 1.28
Epoch 4
loss at batch 0: 1.12
loss at batch 100: 1.16
loss at batch 200: 1.05
loss at batch 300: 1.04
loss at batch 400: 1.12


Evaluating the model

In [None]:
model.weights

In [82]:
predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"accuracy: {matches.mean():.2f}")

accuracy: 0.75


# Week3 - questions

Try to use naive version of relu and add functions.

In [84]:
def naive_relu(x):
  assert len(x.shape) == 2
  x = x.copy()
  for i in range(x.shape[0]):
    for j in range(x.shape[1]):
      x[i, j] = max(x[i, j], 0)
  return x

def naive_add(x, y):
  assert len(x.shape) == 2
  assert x.shape == y.shape
  x = x.copy()
  for i in range(x.shape[0]):
    for j in range(x.shape[1]):
      x[i, j] += y[i, j]
  return x

In [87]:
この世の中を生きていくのは = np.array( [ [1,5,9,-12] , [1,-3,-5,0] ])
孤独で難しいことではあるが = np.array( [ [1,5,9,-14] , [1,-3,-5,7] ])
naive_relu(この世の中を生きていくのは)
naive_add(この世の中を生きていくのは, 孤独で難しいことではあるが)

array([[  2,  10,  18, -26],
       [  2,  -6, -10,   7]])

Why we use x.copy()?

In [None]:
## Preventing Modification of Original Data
## Maintaining Data Integrity
## Avoiding Side Effects

Why the naive version of the code slower?

In [None]:
## 使用者定義関数を使ったから。オラクルデータベースを触っていたときからそんなこともあるの覚えた。

Try to use broadcasting with your own example.

In [96]:
x = np.array([1,2,3,4,5])
y = np.array([ [2,4,5,3,2], [2,9,11,31,48] ])

In [97]:
x = np.expand_dims(x, axis=0)
x = np.concatenate([x]*2, axis=0)
x

array([[1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5]])

In [98]:
x + y

array([[ 3,  6,  8,  7,  7],
       [ 3, 11, 14, 35, 53]])

Try reshaping code and explain it to teammates.

In [101]:
x = np.array([[0., 1.],
              [2., 3.],
              [4., 5.]])
x = x.reshape( (6, 1) )
x.shape

(6, 1)

In [102]:
x = np.transpose(x)
x.shape

(1, 6)

Say x, y are scalar, find derivatives of f(x,y) = x^2*y + x*y + 3*y with respect to x and y. (Set x = 2, y = 1 for the calculation)

In [103]:
x = tf.Variable(2.)
y = tf.Variable(1.)

with tf.GradientTape() as tape:
  z = x*x*y + x*y + 3*y
grad_of_z_wrt_xy = tape.gradient(z,[x,y])

grad_of_z_wrt_xy

[<tf.Tensor: shape=(), dtype=float32, numpy=5.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=9.0>]

Set x, W and b as

x=tf.constant(np.array([1.,4.,3.]).reshape(1,3),dtype=tf.float32)

W=tf.Variable(tf.random.uniform((3,2)),dtype=tf.float32)

b=tf.Variable(tf.zeros((2,)),dtype=tf.float32

In [104]:
x=tf.constant(np.array([1.,4.,3.]).reshape(1,3),dtype=tf.float32)
W=tf.Variable(tf.random.uniform((3,2)),dtype=tf.float32)
b=tf.Variable(tf.zeros((2,)),dtype=tf.float32)

Find derivatives of f(W, b) = (xW + b) with respect to W and b.

In [106]:
with tf.GradientTape() as tape:
  z = tf.matmul(x, W) + b
快乐的深圳旅行 = tape.gradient(z,[W,b])

快乐的深圳旅行

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
 array([[1., 1.],
        [4., 4.],
        [3., 3.]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1., 1.], dtype=float32)>]

Find derivatives of f(W, b) = (xW + b)^3 with respect to W and b.

In [108]:
with tf.GradientTape() as tape:
  z = tf.pow(tf.matmul(x, W) + b, 3)
无聊的海南旅行 = tape.gradient(z,[W,b])

无聊的海南旅行

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
 array([[ 24.104362,  34.991413],
        [ 96.41745 , 139.96565 ],
        [ 72.31309 , 104.97424 ]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([24.104362, 34.991413], dtype=float32)>]