In [15]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

In [16]:
a_np = np.array([2, 3], dtype=np.float64)

In [17]:
a_np.dtype

dtype('float64')

In [18]:
a_tensor = tf.constant(a_np)

In [19]:
a_tensor

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([2., 3.])>

In [20]:
a_tensor + a_tensor

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([4., 6.])>

In [21]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
x_train_full, x_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target.reshape(-1, 1), random_state=42)
x_train, x_val, y_train, y_val = train_test_split(
    x_train_full, y_train_full, random_state=42)

In [22]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_val_scaled = scaler.transform(x_val)
x_test_scaled = scaler.transform(x_test)

In [4]:
def huber_fn(y_true, y_pred):
    error = y_true - y_pred
    is_small_error = tf.math.abs(error) < 1
    squared_loss = tf.math.square(error) / 2
    linear_loss = tf.math.abs(error) - 0.5
    return tf.where(is_small_error, squared_loss, linear_loss)

In [5]:
x_train_scaled.shape

(11610, 8)

In [4]:
tf.random.set_seed(42)

In [7]:
input_shape = x_train_scaled.shape[1:]

model = keras.Sequential()
model.add(keras.layers.Dense(units=30, activation='selu', 
                             kernel_initializer='lecun_normal', input_shape=input_shape))
model.add(keras.layers.Dense(units=1))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 30)                270       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 31        
Total params: 301
Trainable params: 301
Non-trainable params: 0
_________________________________________________________________


In [15]:
keras.backend.clear_session()

In [16]:
model.compile(optimizer='nadam', loss=huber_fn, metrics=['mae'])

In [17]:
model.fit(x=x_train_scaled, y=y_train, epochs=2, 
          validation_data=(x_val_scaled, y_val))

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x16cfe1dffc8>

In [23]:
model_path = 'model/my_model_with_a_custom_loss.h5'
model.save(model_path)

In [25]:
model_load = keras.models.load_model(model_path, 
                                     custom_objects={'huber_fn': huber_fn})

In [26]:
model_load.fit(x_train_scaled, y_train, epochs=2, 
               validation_data=(x_val_scaled, y_val))

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x189262b7408>

In [29]:
def create_huber(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.math.abs(error) < threshold
        squared_loss = tf.math.square(error) / 2
        linear_loss = threshold * tf.math.abs(error) - threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn

In [28]:
model.compile(optimizer='nadam', loss=create_huber(threshold=2.0), 
             metrics=['mae'])

In [30]:
model.fit(x_train_scaled, y_train, epochs=2, 
         validation_data=(x_val_scaled, y_val))

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x189273bec88>

In [31]:
model_path = 'model/my_model_with_a_custom_loss_threshold_2.h5'
model.save(model_path)

In [32]:
model_load = keras.models.load_model(model_path, 
                                    custom_objects={'huber_fn': create_huber(2.0)})

In [33]:
model_load.fit(x_train_scaled, y_train, epochs=2,
              validation_data=(x_val_scaled, y_val))

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x18927409c08>

In [6]:
class HuberLoss(keras.losses.Loss):
    def __init__(self, threshold=1.0, **kwargs):
        self.threshold = threshold
        super().__init__(**kwargs)
    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < self.threshold
        squared_loss = tf.square(error) / 2
        linear_loss  = self.threshold * tf.abs(error) - self.threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, 'threshold': self.threshold}

In [9]:
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="selu", kernel_initializer="lecun_normal",
                       input_shape=input_shape),
    keras.layers.Dense(1),
])

In [11]:
model.compile(loss=HuberLoss(2.), optimizer='nadam', metrics=['mae'])

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x19afa07d608>

In [13]:
model.fit(x_train_scaled, y_train, epochs=2,
         validation_data=(x_val_scaled, y_val))

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x19afb2cb388>

In [4]:
model_path = "model/my_model_with_a_custom_loss_class.h5"
# model.save(model_path)

In [7]:
model = keras.models.load_model(model_path, 
                                custom_objects={'HuberLoss': HuberLoss})

In [8]:
model.loss.threshold

2.0

In [10]:
def my_glorot_initializer(shape_, dtype=tf.float32):
    stddev = tf.sqrt(2. / (shape[0] + shape[1]))
    return tf.random.normal(shape, stddev=stddev, dtype=dtype)

In [11]:
layer = keras.layers.Dense(1, activation='relu', 
                           kernel_initializer=my_glorot_initializer)

In [12]:
class MyL1Regularizer(keras.regularizers.Regularizer):
    def __init__(self, factor, **kwargs):
        self.factor = factor
        super().__init__(**kwargs)
    def __call__(self, weights):
        return tf.reduce_sum(tf.abs(self.factor * weights))
    def get_config(self):
        return {'factor': self.factor}

In [28]:
regularizer = keras.regularizers.Regularizer()
regularizer.get_config()

NotImplementedError: <tensorflow.python.keras.regularizers.Regularizer object at 0x000002A0FFBF2748> does not implement get_config()

In [27]:
loss = keras.losses.Loss()
loss.get_config()

{'reduction': 'auto', 'name': None}

In [13]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [17]:
input_shape = x_train_full.shape[1:]
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="selu", kernel_initializer="lecun_normal",
                       input_shape=input_shape),
    keras.layers.Dense(1, 
                       kernel_regularizer=MyL1Regularizer(0.01)),
])

In [18]:
model.compile(loss="mse", optimizer="nadam", metrics=["mae"])

In [24]:
model.fit(x_train_scaled, y_train, epochs=2,
          validation_data=(x_val_scaled, y_val))

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x2a0f7e6eb48>

In [30]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [31]:
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="selu", kernel_initializer="lecun_normal",
                       input_shape=input_shape),
    keras.layers.Dense(1),
])

In [32]:
model.compile(loss=create_huber(2.0), optimizer='nadam', 
              metrics=[create_huber(2.0)])

In [33]:
model.fit(x_train_scaled, y_train, epochs=2)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x2a0ffc3f988>

In [34]:
precision = keras.metrics.Precision()
precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1])

<tf.Tensor: shape=(), dtype=float32, numpy=0.8>

In [35]:
precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0])

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

In [40]:
precision.result()

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>

In [41]:
precision.variables

[<tf.Variable 'true_positives:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>,
 <tf.Variable 'false_positives:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

In [39]:
precision.reset_states()

In [42]:
class HuberMetric(keras.metrics.Metric):
    def __init__(self, threshold=0.1, **kwargs):
        super().__init__(**kwargs)
        self.threshold = threshold
        self.huber_fn = create_huber(threshold)
        self.total = self.add_weight('total', initializer='zeros')
        self.count = self.add_weight('count', initializer='zeros')
    def update_state(self, y_true, y_pred, sample_weight=None):
        metric = self.huber_fn(y_true, y_pred)
        self.total.assign_add(tf.reduce_sum(metric))
        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))
    def result(self):
        return self.total / self.count
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, 'threshold': self.threshold}

In [43]:
m = HuberMetric(2.)
m(tf.constant([[2.]]), tf.constant([[10.]]))

<tf.Tensor: shape=(), dtype=float32, numpy=14.0>

In [44]:
m(tf.constant([[0.], [5.]]), tf.constant([[1.], [9.25]]))
m.result()

<tf.Tensor: shape=(), dtype=float32, numpy=7.0>

In [45]:
m.variables

[<tf.Variable 'total:0' shape=() dtype=float32, numpy=21.0>,
 <tf.Variable 'count:0' shape=() dtype=float32, numpy=3.0>]

In [46]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [47]:
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="selu", kernel_initializer="lecun_normal",
                       input_shape=input_shape),
    keras.layers.Dense(1),
])

In [51]:
model.compile(loss=create_huber(2.0), optimizer='nadam')

In [52]:
model.fit(x_train_scaled, y_train, epochs=2)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x2a0fff8b0c8>

In [53]:
model.metrics

[<tensorflow.python.keras.metrics.Mean at 0x2a0ffe10bc8>]

In [54]:
class HuberMetric(keras.metrics.Mean):
    def __init__(self, threshold=1.0, name='HuberMetric', dtype=None):
        self.threshold = threshold
        self.huber_fn = create_huber(threshold)
        super().__init__(name=name, dtype=dtype)
    def update_state(self, y_true, y_pred, sample_weight=None):
        metric = self.huber_fn(y_true, y_pred)
        super(HuberMetric, self).update_state(metric, sample_weight)
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}

In [55]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [56]:
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="selu", kernel_initializer="lecun_normal",
                       input_shape=input_shape),
    keras.layers.Dense(1),
])

In [57]:
model.compile(loss=keras.losses.Huber(2.0), optimizer="nadam", weighted_metrics=[HuberMetric(2.0)])

## 사용자 정의 층

In [5]:
exponential_layer = keras.layers.Lambda(lambda x: tf.exp(x))

In [17]:
class MyDense(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = keras.activations.get(activation)
    
    def build(self, batch_input_shape):
        self.kernel = self.add_weight(
            name='kernel', shape=[batch_input_shape[-1], self.units],
            initializer='glorot_normal')
        self.bias = self.add_weight(
            name='bias', shape=[self.units], initializer='zeros')
        super().build(batch_input_shape)
    def call(self, x):
        return self.activation(x @ self.kernel + self.bias)
    def compute_output_shape(self, batch_input_shape):
        return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, 'units': self.units,
               'activation': keras.activations.serialize(self.activation)}

In [9]:
a = MyDense(units=10, activation='relu')

In [10]:
a.get_config()

{'name': 'my_dense_1',
 'trainable': True,
 'dtype': 'float32',
 'units': 10,
 'activation': 'relu'}

In [11]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [18]:
input_shape = x_train_scaled.shape[1:]
model = keras.models.Sequential([
    MyDense(units=30, activation='relu', input_shape=input_shape),
    MyDense(units=1)
])

In [19]:
model.compile(loss='mse', optimizer='nadam')
model.fit(x_train_scaled, y_train, epochs=2,
         validation_data=(x_val_scaled, y_val))
model.evaluate(x_test_scaled, y_test)

Epoch 1/2
Epoch 2/2


0.5473727583885193

In [21]:
model_path = 'model/my_model_with_a_custom_layer.h5'
model.save(model_path)

In [23]:
model = keras.models.load_model(model_path, 
                               custom_objects={'MyDense': MyDense})

In [27]:
model.layers[1].name

'my_dense_7'

In [28]:
b = MyDense(units=10)

In [29]:
b.name

'my_dense_8'

In [30]:
class MyMultiLayer(keras.layers.Layer):
    def call(self, x):
        x1, x2 = x
        return x1 + x2, x1 * x2
    def compute_output_shape(self, batch_input_shape):
        batch_input_shape1, batch_input_shape2 = batch_input_shape
        return [batch_input_shape1, batch_input_shape2]

In [31]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [32]:
inputs1 = keras.layers.Input(shape=[2])
inputs2 = keras.layers.Input(shape=[2])
outputs1, outputs2 = MyMultiLayer()((inputs1, inputs2))

In [33]:
class AddGaussianNoise(keras.layers.Layer):
    def __init__(self, stddev, **kwargs):
        super().__init__(**kwargs)
        self.stddev = stddev
    def call(self, x, training=None):
        if training:
            noise = tf.random.normal(tf.shape(x), stddev=self.stddev)
            return x + noise
        else:
            return x
    def compute_output_shape(self, batch_input_shape):
        return batch_input_shape

In [34]:
x_new_scaled = x_test_scaled

In [35]:
class ResidualBlock(keras.layers.Layer):
    def __init__(self, n_layers, n_neurons, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(units=n_neurons, activation='elu',
                                         kernel_initializer='he_normal')
                      for _ in range(n_layers)]
    def call(self, inputs):
        z = inputs
        for layer in self.hidden:
            z = layer(z)
        return inputs + z

In [36]:
class ResidualRegressor(keras.models.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden1 = keras.layers.Dense(30, activation='elu',
                                         kernel_initializer='he_normal')
        self.block1 = ResidualBlock(n_layers=2, n_neurons=30)
        self.block2 = ResidualBlock(n_layers=2, n_neurons=30)
        self.out = keras.layers.Dense(output_dim)
    def call(self, inputs):
        z = self.hidden1(inputs)
        for _ in range(1+3):
            z = self.block1(z)
        z = self.block2(z)
        return self.out(z)

In [37]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [38]:
model = ResidualRegressor(1)
model.compile(loss='mse', optimizer='nadam')
history = model.fit(x_train_scaled, y_train, epochs=5)
score = model.evaluate(x_test_scaled, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [39]:
model_path = 'model/my_custom_model.ckpt'
model.save(model_path)

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: model/my_custom_model.ckpt\assets


In [40]:
model = keras.models.load_model(model_path)

In [41]:
history = model.fit(x_train_scaled, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [45]:
class ReconstructingRegressor(keras.models.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(30, activation='selu',
                                         kernel_initializer='lecun_normal')
                      for _ in range(5)]
        self.out = keras.layers.Dense(output_dim)
        self.reconstruct = keras.layers.Dense(8)
        self.reconstruction_mean = keras.metrics.Mean(name='reconstruction_error')
    def call(self, inputs, training=None):
        z = inputs
        for layer in self.hidden:
            z = layer(z)
        reconstruction = self.reconstruct(z)
        self.recon_loss = 0.05 * tf.reduce_mean(tf.square(reconstruction - inputs))
        
        if training:
            result = self.reconstruction_mean(recon_loss)
            self.add_metric(result)
        return self.out(z)
    def train_step(self, data):
        x, y = data
        
        with tf.GradientTape() as tape:
            y_pred = self(x)
            loss = self.compiled_loss(y, y_pred, 
                                     regularization_losses=[self.recon_loss])
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return {m.name: m.result() for m in self.metrics}

In [46]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [47]:
model = ReconstructingRegressor(1)
model.compile(loss='mse', optimizer='nadam')
history = model.fit(x_train_scaled, y_train, epochs=2)
y_pred = model.predict(x_test_scaled)

Epoch 1/2
Epoch 2/2


## 그래디언트 계산

In [50]:
def f(w1, w2):
    return 3 * w1 ** 2 + 2 * w1 * w2

In [51]:
w1, w2 = 5, 3
eps = 1e-6
(f(w1 + eps, w2) - f(w1, w2)) / eps

36.000003007075065

In [52]:
(f(w1, w2 + eps) - f(w1, w2)) / eps

10.000000003174137

In [53]:
w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
    z = f(w1, w2)
gradients = tape.gradient(z, [w1, w2])

In [54]:
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [55]:
with tf.GradientTape() as tape:
    z = f(w1, w2)

dz_dw1 = tape.gradient(z, w1)

In [56]:
dz_dw1

<tf.Tensor: shape=(), dtype=float32, numpy=36.0>

In [57]:
dz_dw2 = tape.gradient(z, w2)

RuntimeError: GradientTape.gradient can only be called once on non-persistent tapes.

In [58]:
with tf.GradientTape(persistent=True) as tape:
    z = f(w1, w2)
    
dz_dw1 = tape.gradient(z, w1)
dz_dw2 = tape.gradient(z, w2)
del tape

In [59]:
dz_dw1, dz_dw2

(<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>)

In [60]:
c1, c2 = tf.constant(5.), tf.constant(3.)
with tf.GradientTape() as tape:
    z = f(c1, c2)
    
gradients = tape.gradient(z, [c1, c2])

In [61]:
gradients

[None, None]

In [62]:
with tf.GradientTape() as tape:
    tape.watch(c1)
    tape.watch(c2)
    z = f(c1, c2)
    
gradients = tape.gradient(z, [c1, c2])

In [63]:
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [64]:
with tf.GradientTape() as tape:
    z1 = f(w1, w2 + 2.)
    z2 = f(w1, w2 + 5.)
    z3 = f(w1, w2 + 7.)

tape.gradient([z1, z2, z3], [w1, w2])

[<tf.Tensor: shape=(), dtype=float32, numpy=136.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=30.0>]

In [65]:
with tf.GradientTape(persistent=True) as tape:
    z1 = f(w1, w2 + 2.)
    z2 = f(w1, w2 + 5.)
    z3 = f(w1, w2 + 7.)

tf.reduce_sum(tf.stack([tape.gradient(z, [w1, w2]) for z in (z1, z2, z3)]), axis=0)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([136.,  30.], dtype=float32)>

In [66]:
del tape

In [67]:
with tf.GradientTape(persistent=True) as hessian_tape:
    with tf.GradientTape() as jacobian_tape:
        z = f(w1, w2)
    jacobians = jacobian_tape.gradient(z, [w1, w2])
hessians = [hessian_tape.gradient(jacobian, [w1, w2]) for jacobian in jacobians]
del hessian_tape

In [68]:
hessians

[[<tf.Tensor: shape=(), dtype=float32, numpy=6.0>,
  <tf.Tensor: shape=(), dtype=float32, numpy=2.0>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=2.0>, None]]

In [69]:
jacobians

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [70]:
def f(w1, w2):
    return 3 * w1 ** 2 + tf.stop_gradient(2 * w1 * w2)

with tf.GradientTape() as tape:
    z = f(w1, w2)

tape.gradient(z, [w1, w2])

[<tf.Tensor: shape=(), dtype=float32, numpy=30.0>, None]

In [71]:
def my_softplus(z):
    return tf.math.log(tf.exp(z) + 1.0)

In [74]:
x = tf.Variable(100.)
with tf.GradientTape() as tape:
    z = my_softplus(x)

tape.gradient(z, x)

<tf.Tensor: shape=(), dtype=float32, numpy=nan>

In [73]:
import math
math.log(math.exp(1))

1.0

## 사용자 정의 훈련

In [23]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [24]:
l2_reg = keras.regularizers.l2(0.05)
model = keras.models.Sequential([
    keras.layers.Dense(30, activation='elu', kernel_initializer='he_normal',
                      kernel_regularizer=l2_reg),
    keras.layers.Dense(1, kernel_regularizer=l2_reg)
])

In [25]:
def random_batch(x, y, batch_size=32):
    idx = np.random.randint(len(x), size=batch_size)
    return x[idx], y[idx]

In [26]:
a = np.random.randint(10, size=3)

In [27]:
a

array([6, 3, 7])

In [28]:
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result()) 
                          for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics, end=end)

In [29]:
n_epochs = 5
batch_size = 32
n_steps = len(x_train) // batch_size
optimizer = keras.optimizers.Nadam(lr=0.01)
loss_fn = keras.losses.mean_squared_error
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.MeanAbsoluteError()]

In [31]:
count = 0
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        x_batch, y_batch = random_batch(x_train_scaled, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(x_batch)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        for variable in model.variables:
            if variable.constraint is not None:
                variable.assign(variable.constraint(variable))
                count += 1
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_states()
print(count)

Epoch 1/5
11610/11610 - mean: 0.6510 - mean_absolute_error: 0.5227
Epoch 2/5
11610/11610 - mean: 0.6301 - mean_absolute_error: 0.5122
Epoch 3/5
11610/11610 - mean: 0.6260 - mean_absolute_error: 0.5097
Epoch 4/5
11610/11610 - mean: 0.6123 - mean_absolute_error: 0.5053
Epoch 5/5
11610/11610 - mean: 0.6370 - mean_absolute_error: 0.5133
0


In [28]:
def cube(x):
    return x**3

In [29]:
cube(2)

8

In [30]:
cube(tf.constant(2.0))

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [31]:
tf_cube = tf.function(cube)
tf_cube

<tensorflow.python.eager.def_function.Function at 0x23de01e6a88>

In [32]:
tf_cube(2)

<tf.Tensor: shape=(), dtype=int32, numpy=8>

In [33]:
tf_cube(tf.constant(2.0))

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [35]:
@tf.function
def tf_cube(x):
    return x**3

In [36]:
tf_cube

<tensorflow.python.eager.def_function.Function at 0x23de0284488>

In [37]:
cube

<function __main__.cube(x)>

In [38]:
import time
start = time.time()
cube(2)
time.time() - start

0.0

In [39]:
@tf.function
def f(x):
    return np.random.rand(*x.shape)

In [40]:
f(tf.constant(2.))

<tf.Tensor: shape=(), dtype=float32, numpy=0.41544533>

In [41]:
f(tf.constant(3.))

<tf.Tensor: shape=(), dtype=float32, numpy=0.41544533>

In [42]:
f(tf.constant([2., 3.]))

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([0.15537049, 0.68338269])>

In [43]:
@tf.function
def f(x):
    return tf.random.uniform(x.shape)

In [44]:
f(tf.constant(2.))

<tf.Tensor: shape=(), dtype=float32, numpy=0.8354591>

In [45]:
f(tf.constant(3.))

<tf.Tensor: shape=(), dtype=float32, numpy=0.46399975>

# 연습문제 12. 층 정규화를 수행하는 사용자 정의 층 구현

## a. 

In [51]:
class LayerNormalization(keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.epsilon = 1e-4
    
    def build(self, input_shape):
#         alpha_init = keras.initializers.Ones()
#         beta_init = keras.initializers.Zeros()
#         self.alpha = tf.Variable(initial_value=alpha_init(shape=input_shape[-1:], 
#                                                          dtype='float32'),
#                                 trainable=True)
#         self.beta = tf.Variable(initial_value=beta_init(shape=input_shape[-1:], 
#                                                          dtype='float32'),
#                                 trainable=True)
        self.alpha = self.add_weight(name='alpha', shape=input)
        super().build(input_shape)
    
    def call(self, inputs):
        mean, var = tf.nn.moments(inputs, axes=-1, keepdims=True)
        tmp = (inputs - mean) / (tf.sqrt(var) + self.epsilon)
        return self.alpha * tmp + self.beta

In [52]:
x = x_train.astype(np.float32)

custom_layer_norm = LayerNormalization()
keras_layer_norm = keras.layers.LayerNormalization()

tf.reduce_mean(keras.losses.mean_absolute_error(keras_layer_norm(x), 
                                                custom_layer_norm(x)))

<tf.Tensor: shape=(), dtype=float32, numpy=1.9283716e-07>

In [55]:
random_alpha = np.random.rand(x.shape[-1])
random_beta = np.random.rand(x.shape[-1])

custom_layer_norm.set_weights([random_alpha, random_beta])
keras_layer_norm.set_weights([random_alpha, random_beta])

tf.reduce_mean(keras.losses.mean_absolute_error(
    keras_layer_norm(x), custom_layer_norm(x)))

<tf.Tensor: shape=(), dtype=float32, numpy=7.14914e-08>

In [60]:
custom_layer_norm.weights

[<tf.Variable 'layer_normalization/Variable:0' shape=(8,) dtype=float32, numpy=
 array([0.79690266, 0.04737473, 0.98719597, 0.6970269 , 0.32140198,
        0.1512938 , 0.39253142, 0.22056566], dtype=float32)>,
 <tf.Variable 'layer_normalization/Variable:0' shape=(8,) dtype=float32, numpy=
 array([0.49415323, 0.7818927 , 0.6601304 , 0.4359349 , 0.11894392,
        0.9571569 , 0.2703705 , 0.27783453], dtype=float32)>]

In [59]:
keras_layer_norm.weights

[<tf.Variable 'layer_normalization_1/gamma:0' shape=(8,) dtype=float32, numpy=
 array([0.79690266, 0.04737473, 0.98719597, 0.6970269 , 0.32140198,
        0.1512938 , 0.39253142, 0.22056566], dtype=float32)>,
 <tf.Variable 'layer_normalization_1/beta:0' shape=(8,) dtype=float32, numpy=
 array([0.49415323, 0.7818927 , 0.6601304 , 0.4359349 , 0.11894392,
        0.9571569 , 0.2703705 , 0.27783453], dtype=float32)>]

# 연습문제13번. 사용자 정의 훈련반복을 통해 패션 mnist 학습

In [32]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [33]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

In [34]:
fashion_mnist = keras.datasets.fashion_mnist.load_data()

In [35]:
(x_train_full, y_train_full), (x_test, y_test) = fashion_mnist

In [36]:
x_train_full.shape

(60000, 28, 28)

In [37]:
y_train_full.shape

(60000,)

In [38]:
x_train, y_train = x_train_full[:50000] / 255., y_train_full[:50000]
x_val, y_val = x_train_full[50000:] / 255., y_train_full[50000:]
x_test = x_test / 255.

In [39]:
x_train.shape, y_train.shape, x_val.shape, y_val.shape

((50000, 28, 28), (50000,), (10000, 28, 28), (10000,))

In [42]:
y_pred = model(x_val)

In [49]:
loss_fn = keras.losses.sparse_categorical_crossentropy
loss_fn(y_val, y_pred).shape

TensorShape([10000])

In [41]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=x_train.shape[1:]),
    keras.layers.Dense(units=300, activation='selu', 
                       kernel_initializer='lecun_normal'),
    keras.layers.Dense(units=300, activation='selu', 
                       kernel_initializer='lecun_normal'),
    keras.layers.Dense(units=10, activation='softmax', 
                       kernel_initializer='lecun_normal')
])

In [10]:
def random_batch(x, y, batch_size=32):
    idx = np.random.randint(len(x), size=batch_size)
    return x[idx], y[idx]

In [11]:
def print_status_bar(iteration, total, loss, metrics=None, val_metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result()) 
                          for m in [loss] + (metrics or [])])
    if val_metrics:
        val_metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result()) 
                              for m in [loss] + (val_metrics or [])])
    else:
        val_metrics = ""
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics + val_metrics, end=end)

In [45]:
n_epochs = 10
batch_size = 32
n_steps = len(x_train) // batch_size
optimizer = keras.optimizers.Nadam()
loss_fn = keras.losses.SparseCategoricalCrossentropy()
mean_loss = keras.metrics.Mean(name='loss')
metrics = [keras.metrics.Accuracy(name='acc')]
mean_loss_val = keras.metrics.Mean(name='val_loss')
metrics_val = keras.metrics.Accuracy(name='val_acc')
# n_steps_val = len(x_val) // batch_size

In [13]:
x_batch, y_batch = random_batch(x_)
y_pred = model()
loss = loss_fn()