In [8]:

!pip install -q keras-tuner

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers, constraints, initializers, callbacks
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


def load_data():
    (x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
    x_train = x_train.astype("float32") / 255.0
    x_test = x_test.astype("float32") / 255.0
    x_train = np.expand_dims(x_train, -1)
    x_test = np.expand_dims(x_test, -1)
    return train_test_split(x_train, y_train, test_size=0.2, random_state=42), (x_test, y_test)

(x_train, x_val, y_train, y_val), (x_test, y_test) = load_data()


In [13]:

class CustomDropout(layers.Layer):
    def __init__(self, rate):
        super().__init__()
        self.rate = rate
    def call(self, inputs, training=False):
        if training:
            return tf.nn.dropout(inputs, rate=self.rate)
        return inputs

class MaxNormDense(layers.Layer):
    def __init__(self, units):
        super().__init__()
        self.units = units
    def build(self, input_shape):
        self.kernel = self.add_weight(name="kernel", shape=[input_shape[-1], self.units], initializer="random_normal")
        self.bias = self.add_weight(name="bias", shape=[self.units], initializer="zeros")
    def call(self, inputs):
        normed_kernel = tf.clip_by_norm(self.kernel, clip_norm=1.0, axes=[0])
        return tf.matmul(inputs, normed_kernel) + self.bias



In [14]:

class OneCycleScheduler(callbacks.Callback):
    def __init__(self, max_lr, total_steps):
        self.max_lr = max_lr
        self.total_steps = total_steps
    def on_train_batch_begin(self, batch, logs=None):
        pct = self.model.optimizer.iterations / self.total_steps
        lr = self.max_lr * (1 - pct)
        keras.backend.set_value(self.model.optimizer.learning_rate, lr)


log_dir = "logs/custom_keras"
tb_cb = callbacks.TensorBoard(log_dir=log_dir)

class CustomHuber(keras.losses.Loss):
    def __init__(self, delta=1.0):
        super().__init__()
        self.delta = delta
    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small = tf.abs(error) <= self.delta
        small_loss = 0.5 * tf.square(error)
        big_loss = self.delta * (tf.abs(error) - 0.5 * self.delta)
        return tf.where(is_small, small_loss, big_loss)



In [17]:
class MyL1Regularizer(regularizers.Regularizer):
    def __init__(self, strength=0.01):
        self.strength = strength
    def __call__(self, x):
        return self.strength * tf.reduce_sum(tf.abs(x))

class MyGlorotInit(initializers.Initializer):
    def __call__(self, shape, dtype=None):
        limit = tf.sqrt(tf.constant(6.0, dtype=tf.float32) / tf.cast(tf.reduce_sum(shape), tf.float32))
        return tf.random.uniform(shape, -limit, limit, dtype=dtype)


def my_leaky_relu(x):
    return tf.nn.leaky_relu(x, alpha=0.2)

class MyPositiveWeights(constraints.Constraint):
    def __call__(self, w):
        return tf.nn.relu(w)

class CustomAccuracy(keras.metrics.Metric):
    def __init__(self, name="custom_accuracy", **kwargs):
        super().__init__(name=name, **kwargs)
        self.total = self.add_weight(name="total", initializer="zeros")
        self.count = self.add_weight(name="count", initializer="zeros")
    def update_state(self, y_true, y_pred, sample_weight=None):
        values = tf.cast(tf.equal(tf.argmax(y_pred, axis=1), tf.cast(y_true, tf.int64)), tf.float32)
        self.total.assign_add(tf.reduce_sum(values))
        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))
    def result(self):
        return self.total / self.count

model = keras.Sequential([
    layers.Input(shape=(28, 28, 1)),
    layers.Flatten(),
    MaxNormDense(128),
    layers.Activation(my_leaky_relu),
    CustomDropout(0.3),
    layers.Dense(64,
                 activation=my_leaky_relu,
                 kernel_initializer=MyGlorotInit(),
                 kernel_regularizer=MyL1Regularizer(),
                 kernel_constraint=MyPositiveWeights()),
    layers.Dense(10, activation="softmax")
])


In [18]:
class MutableLearningRate(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, initial_lr):
        self.lr = tf.Variable(initial_lr, trainable=False, dtype=tf.float32)
    def __call__(self, step):
        return self.lr

mutable_lr = MutableLearningRate(0.001)

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=mutable_lr),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=[CustomAccuracy()]
)


model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=10,
    callbacks=[tb_cb, OneCycleScheduler(max_lr=0.001, total_steps=10 * len(x_train) // 32)]
)

model.evaluate(x_test, y_test)


Epoch 1/10




[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 8ms/step - custom_accuracy: 0.6608 - loss: 2.0235 - val_custom_accuracy: 0.8073 - val_loss: 0.7807
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 7ms/step - custom_accuracy: 0.7980 - loss: 0.7886 - val_custom_accuracy: 0.8185 - val_loss: 0.7095
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 8ms/step - custom_accuracy: 0.8135 - loss: 0.7143 - val_custom_accuracy: 0.8278 - val_loss: 0.6577
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - custom_accuracy: 0.8238 - loss: 0.6702 - val_custom_accuracy: 0.8360 - val_loss: 0.6231
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - custom_accuracy: 0.8311 - loss: 0.6390 - val_custom_accuracy: 0.8351 - val_loss: 0.6094
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 9ms/step - custom_accuracy: 0.8309 -

[0.5775592923164368, 0.8371000289916992]