In [9]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [10]:
# Loading the cifar dataset
cifar_data = tf.keras.datasets.cifar10.load_data()
cifar_scaled_train = cifar_data[0][0] / 255
cifar_scaled_test = cifar_data[1][0] / 255

In [11]:
# Splitting up the data\
x_train = cifar_scaled_train
y_train = cifar_data[0][1]
x_test = cifar_scaled_test
y_test = cifar_data[1][1]

# Splitting the validation and training data
x_val = x_train[40000:]
y_val = y_train[40000:]
x_train = x_train[:40000]
y_train = y_train[:40000]

In [None]:
# Making a regularizer
l2_reg = tf.keras.regularizers.l2(0.05)

In [None]:
# Here we create a custom model that does the same thing as above
class SeluModel(tf.keras.Model):
    
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [tf.keras.layers.Dense(100, activation="selu", 
        kernel_initializer="lecun_normal", kernel_regularizer=l2_reg) for _ in range(20)]
        self.out = tf.keras.layers.Dense(output_dim, activation="softmax")
        
    def build(self, batch_input_shape):
        # The first shape would be the batch size
        self.input_layer = tf.keras.layers.Flatten(input_shape=batch_input_shape[1:])
        super().build(batch_input_shape)
        
    def call(self, inputs):
        batch_of_pixel_grids = inputs
        # Here we should be handling the batch of pictures so no need to cut off to only the shape
        Z = self.input_layer(batch_of_pixel_grids)
        for layer in self.hidden:
            Z = layer(Z)
        return self.out(Z)

In [47]:
model = SeluModel(10)
model.build(cifar_scaled_train.shape)
model.summary()

Model: "selu_model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_63 (Dense)             multiple                  307300    
_________________________________________________________________
dense_64 (Dense)             multiple                  10100     
_________________________________________________________________
dense_65 (Dense)             multiple                  10100     
_________________________________________________________________
dense_66 (Dense)             multiple                  10100     
_________________________________________________________________
dense_67 (Dense)             multiple                  10100     
_________________________________________________________________
dense_68 (Dense)             multiple                  10100     
_________________________________________________________________
dense_69 (Dense)             multiple                 

In [48]:
# Metrics and hyperparameters for the model
n_epochs = 5
batch_size = 32
n_steps = len(x_train) // batch_size
optimizer = tf.keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = tf.keras.losses.sparse_categorical_crossentropy
mean_loss = tf.keras.metrics.Mean()
metrics = [tf.keras.metrics.MeanAbsoluteError()]

In [49]:
# Random Batching function to get shuffled batches on each interation
def random_batch(x, y, batch_size=32):
    idx = np.random.randint(len(x), size=batch_size)
    return x[idx], y[idx]

In [50]:
# Printing function
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result())
                        for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics,
        end=end)

In [45]:
# Custom training loop (Shitty version)
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(x_train, y_train)
        with tf.GradientTape() as tape:
            y_pred = model3(X_batch, training=True)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model3.losses)
        gradients = tape.gradient(loss, model3.trainable_variables)
        optimizer.apply_gradients(zip(gradients,
model3.trainable_variables))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss,
metrics)
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
for metric in [mean_loss] + metrics:metric.reset_states()

Epoch 1/5
2048/40000 - mean: 2.5274 - mean_absolute_error: 4.3511

KeyboardInterrupt: 

In [52]:
# Custom Training loop, Aurelien's version (the correct version)
# Still don't really know if this one is all that good either, lets fix this up as an exercise
from tqdm import trange
from collections import OrderedDict
# Keeping track of the overall progress with tqdm
with trange(1, n_epochs + 1, desc="All epochs") as epochs:
    for epoch in epochs:
        with trange(1, n_steps + 1, desc="Epoch {}/{}".format(epoch, n_epochs)) as steps:
            for step in steps:
                X_batch, y_batch = random_batch(x_train, y_train)
                # Here we are tracking the mathematical operators for the
                # back-prop calculus with epsilon gradient approximations
                with tf.GradientTape() as tape:
                    y_pred = model(X_batch)
                    main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
                    loss = tf.add_n([main_loss] + model.losses)
                gradients = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                for variable in model.variables:
                    if variable.constraint is not None:
                        variable.assign(variable.constraint(variable))                    
                status = OrderedDict()
                mean_loss(loss)
                status["loss"] = mean_loss.result().numpy()
                for metric in metrics:
                    metric(y_batch, y_pred)
                    status[metric.name] = metric.result().numpy()
                steps.set_postfix(status)
            y_pred = model(x_val)
            status["val_loss"] = np.mean(loss_fn(y_val, y_pred))
            status["val_accuracy"] = np.mean(tf.keras.metrics.sparse_categorical_accuracy(
                tf.constant(y_val, dtype=np.float32), y_pred))
            steps.set_postfix(status)
        for metric in [mean_loss] + metrics:
            metric.reset_states()

All epochs:   0%|          | 0/5 [00:00<?, ?it/s]
Epoch 1/5:   0%|          | 0/1250 [00:00<?, ?it/s][A
Epoch 1/5:   0%|          | 0/1250 [00:00<?, ?it/s, loss=3.1e+3, mean_absolute_error=4.41][A
Epoch 1/5:   0%|          | 0/1250 [00:00<?, ?it/s, loss=3.1e+3, mean_absolute_error=4.41][A
Epoch 1/5:   0%|          | 2/1250 [00:00<01:24, 14.74it/s, loss=3.1e+3, mean_absolute_error=4.41][A
Epoch 1/5:   0%|          | 2/1250 [00:00<01:24, 14.74it/s, loss=3.1e+3, mean_absolute_error=4.41][A
Epoch 1/5:   0%|          | 2/1250 [00:00<01:24, 14.74it/s, loss=3.1e+3, mean_absolute_error=4.41][A
Epoch 1/5:   0%|          | 4/1250 [00:00<01:20, 15.44it/s, loss=3.1e+3, mean_absolute_error=4.41][A
Epoch 1/5:   0%|          | 4/1250 [00:00<01:20, 15.44it/s, loss=3.09e+3, mean_absolute_error=4.41][A
Epoch 1/5:   0%|          | 4/1250 [00:00<01:20, 15.44it/s, loss=3.09e+3, mean_absolute_error=4.41][A
Epoch 1/5:   0%|          | 6/1250 [00:00<01:19, 15.60it/s, loss=3.09e+3, mean_absolute_error

Epoch 1/5:   8%|▊         | 104/1250 [00:06<01:13, 15.66it/s, loss=2.87e+3, mean_absolute_error=4.41][A
Epoch 1/5:   8%|▊         | 104/1250 [00:06<01:13, 15.66it/s, loss=2.86e+3, mean_absolute_error=4.41][A
Epoch 1/5:   8%|▊         | 106/1250 [00:06<01:14, 15.29it/s, loss=2.86e+3, mean_absolute_error=4.41][A
Epoch 1/5:   8%|▊         | 106/1250 [00:07<01:14, 15.29it/s, loss=2.86e+3, mean_absolute_error=4.41][A
Epoch 1/5:   8%|▊         | 106/1250 [00:07<01:14, 15.29it/s, loss=2.86e+3, mean_absolute_error=4.41][A
Epoch 1/5:   9%|▊         | 108/1250 [00:07<01:14, 15.39it/s, loss=2.86e+3, mean_absolute_error=4.41][A
Epoch 1/5:   9%|▊         | 108/1250 [00:07<01:14, 15.39it/s, loss=2.86e+3, mean_absolute_error=4.41][A
Epoch 1/5:   9%|▊         | 108/1250 [00:07<01:14, 15.39it/s, loss=2.86e+3, mean_absolute_error=4.41][A
Epoch 1/5:   9%|▉         | 110/1250 [00:07<01:13, 15.59it/s, loss=2.86e+3, mean_absolute_error=4.41][A
Epoch 1/5:   9%|▉         | 110/1250 [00:07<01:13, 15.5

Epoch 1/5:  17%|█▋        | 208/1250 [00:13<01:08, 15.21it/s, loss=2.66e+3, mean_absolute_error=4.41][A
Epoch 1/5:  17%|█▋        | 208/1250 [00:13<01:08, 15.21it/s, loss=2.66e+3, mean_absolute_error=4.41][A
Epoch 1/5:  17%|█▋        | 210/1250 [00:13<01:07, 15.35it/s, loss=2.66e+3, mean_absolute_error=4.41][A
Epoch 1/5:  17%|█▋        | 210/1250 [00:13<01:07, 15.35it/s, loss=2.66e+3, mean_absolute_error=4.41][A
Epoch 1/5:  17%|█▋        | 210/1250 [00:13<01:07, 15.35it/s, loss=2.66e+3, mean_absolute_error=4.41][A
Epoch 1/5:  17%|█▋        | 212/1250 [00:13<01:07, 15.35it/s, loss=2.66e+3, mean_absolute_error=4.41][A
Epoch 1/5:  17%|█▋        | 212/1250 [00:13<01:07, 15.35it/s, loss=2.65e+3, mean_absolute_error=4.41][A
Epoch 1/5:  17%|█▋        | 212/1250 [00:14<01:07, 15.35it/s, loss=2.65e+3, mean_absolute_error=4.41][A
Epoch 1/5:  17%|█▋        | 214/1250 [00:14<01:08, 15.22it/s, loss=2.65e+3, mean_absolute_error=4.41][A
Epoch 1/5:  17%|█▋        | 214/1250 [00:14<01:08, 15.2

Epoch 1/5:  25%|██▍       | 312/1250 [00:20<00:59, 15.65it/s, loss=2.48e+3, mean_absolute_error=4.41][A
Epoch 1/5:  25%|██▍       | 312/1250 [00:20<00:59, 15.65it/s, loss=2.48e+3, mean_absolute_error=4.41][A
Epoch 1/5:  25%|██▌       | 314/1250 [00:20<00:59, 15.72it/s, loss=2.48e+3, mean_absolute_error=4.41][A
Epoch 1/5:  25%|██▌       | 314/1250 [00:20<00:59, 15.72it/s, loss=2.48e+3, mean_absolute_error=4.41][A
Epoch 1/5:  25%|██▌       | 314/1250 [00:20<00:59, 15.72it/s, loss=2.48e+3, mean_absolute_error=4.41][A
Epoch 1/5:  25%|██▌       | 316/1250 [00:20<00:59, 15.80it/s, loss=2.48e+3, mean_absolute_error=4.41][A
Epoch 1/5:  25%|██▌       | 316/1250 [00:20<00:59, 15.80it/s, loss=2.48e+3, mean_absolute_error=4.41][A
Epoch 1/5:  25%|██▌       | 316/1250 [00:20<00:59, 15.80it/s, loss=2.48e+3, mean_absolute_error=4.41][A
Epoch 1/5:  25%|██▌       | 318/1250 [00:20<00:59, 15.70it/s, loss=2.48e+3, mean_absolute_error=4.41][A
Epoch 1/5:  25%|██▌       | 318/1250 [00:20<00:59, 15.7

Epoch 1/5:  33%|███▎      | 416/1250 [00:27<00:52, 16.00it/s, loss=2.33e+3, mean_absolute_error=4.41][A
Epoch 1/5:  33%|███▎      | 416/1250 [00:27<00:52, 16.00it/s, loss=2.33e+3, mean_absolute_error=4.41][A
Epoch 1/5:  33%|███▎      | 418/1250 [00:27<00:51, 16.05it/s, loss=2.33e+3, mean_absolute_error=4.41][A
Epoch 1/5:  33%|███▎      | 418/1250 [00:27<00:51, 16.05it/s, loss=2.33e+3, mean_absolute_error=4.41][A
Epoch 1/5:  33%|███▎      | 418/1250 [00:27<00:51, 16.05it/s, loss=2.33e+3, mean_absolute_error=4.41][A
Epoch 1/5:  34%|███▎      | 420/1250 [00:27<00:51, 16.03it/s, loss=2.33e+3, mean_absolute_error=4.41][A
Epoch 1/5:  34%|███▎      | 420/1250 [00:27<00:51, 16.03it/s, loss=2.32e+3, mean_absolute_error=4.41][A
Epoch 1/5:  34%|███▎      | 420/1250 [00:27<00:51, 16.03it/s, loss=2.32e+3, mean_absolute_error=4.41][A
Epoch 1/5:  34%|███▍      | 422/1250 [00:27<00:51, 16.07it/s, loss=2.32e+3, mean_absolute_error=4.41][A
Epoch 1/5:  34%|███▍      | 422/1250 [00:27<00:51, 16.0

Epoch 1/5:  42%|████▏     | 520/1250 [00:33<00:46, 15.84it/s, loss=2.19e+3, mean_absolute_error=4.42][A
Epoch 1/5:  42%|████▏     | 520/1250 [00:33<00:46, 15.84it/s, loss=2.19e+3, mean_absolute_error=4.42][A
Epoch 1/5:  42%|████▏     | 522/1250 [00:33<00:45, 15.83it/s, loss=2.19e+3, mean_absolute_error=4.42][A
Epoch 1/5:  42%|████▏     | 522/1250 [00:33<00:45, 15.83it/s, loss=2.19e+3, mean_absolute_error=4.42][A
Epoch 1/5:  42%|████▏     | 522/1250 [00:33<00:45, 15.83it/s, loss=2189.5, mean_absolute_error=4.42] [A
Epoch 1/5:  42%|████▏     | 524/1250 [00:33<00:45, 15.85it/s, loss=2189.5, mean_absolute_error=4.42][A
Epoch 1/5:  42%|████▏     | 524/1250 [00:33<00:45, 15.85it/s, loss=2.19e+3, mean_absolute_error=4.42][A
Epoch 1/5:  42%|████▏     | 524/1250 [00:33<00:45, 15.85it/s, loss=2.19e+3, mean_absolute_error=4.42][A
Epoch 1/5:  42%|████▏     | 526/1250 [00:33<00:46, 15.70it/s, loss=2.19e+3, mean_absolute_error=4.42][A
Epoch 1/5:  42%|████▏     | 526/1250 [00:34<00:46, 15.70

Epoch 1/5:  50%|████▉     | 624/1250 [00:40<00:41, 15.14it/s, loss=2.07e+3, mean_absolute_error=4.41][A
Epoch 1/5:  50%|████▉     | 624/1250 [00:40<00:41, 15.14it/s, loss=2.07e+3, mean_absolute_error=4.41][A
Epoch 1/5:  50%|█████     | 626/1250 [00:40<00:41, 15.04it/s, loss=2.07e+3, mean_absolute_error=4.41][A
Epoch 1/5:  50%|█████     | 626/1250 [00:40<00:41, 15.04it/s, loss=2.07e+3, mean_absolute_error=4.41][A
Epoch 1/5:  50%|█████     | 626/1250 [00:40<00:41, 15.04it/s, loss=2.07e+3, mean_absolute_error=4.41][A
Epoch 1/5:  50%|█████     | 628/1250 [00:40<00:41, 14.91it/s, loss=2.07e+3, mean_absolute_error=4.41][A
Epoch 1/5:  50%|█████     | 628/1250 [00:40<00:41, 14.91it/s, loss=2.07e+3, mean_absolute_error=4.41][A
Epoch 1/5:  50%|█████     | 628/1250 [00:40<00:41, 14.91it/s, loss=2.07e+3, mean_absolute_error=4.41][A
Epoch 1/5:  50%|█████     | 630/1250 [00:40<00:41, 15.08it/s, loss=2.07e+3, mean_absolute_error=4.41][A
Epoch 1/5:  50%|█████     | 630/1250 [00:40<00:41, 15.0

Epoch 1/5:  58%|█████▊    | 728/1250 [00:47<00:34, 15.08it/s, loss=1.96e+3, mean_absolute_error=4.41][A
Epoch 1/5:  58%|█████▊    | 728/1250 [00:47<00:34, 15.08it/s, loss=1.96e+3, mean_absolute_error=4.41][A
Epoch 1/5:  58%|█████▊    | 730/1250 [00:47<00:34, 15.14it/s, loss=1.96e+3, mean_absolute_error=4.41][A
Epoch 1/5:  58%|█████▊    | 730/1250 [00:47<00:34, 15.14it/s, loss=1.96e+3, mean_absolute_error=4.41][A
Epoch 1/5:  58%|█████▊    | 730/1250 [00:47<00:34, 15.14it/s, loss=1.96e+3, mean_absolute_error=4.41][A
Epoch 1/5:  59%|█████▊    | 732/1250 [00:47<00:34, 15.23it/s, loss=1.96e+3, mean_absolute_error=4.41][A
Epoch 1/5:  59%|█████▊    | 732/1250 [00:47<00:34, 15.23it/s, loss=1.96e+3, mean_absolute_error=4.41][A
Epoch 1/5:  59%|█████▊    | 732/1250 [00:47<00:34, 15.23it/s, loss=1.96e+3, mean_absolute_error=4.41][A
Epoch 1/5:  59%|█████▊    | 734/1250 [00:47<00:33, 15.36it/s, loss=1.96e+3, mean_absolute_error=4.41][A
Epoch 1/5:  59%|█████▊    | 734/1250 [00:47<00:33, 15.3

Epoch 1/5:  67%|██████▋   | 832/1250 [00:54<00:26, 15.70it/s, loss=1.87e+3, mean_absolute_error=4.42][A
Epoch 1/5:  67%|██████▋   | 832/1250 [00:54<00:26, 15.70it/s, loss=1.86e+3, mean_absolute_error=4.42][A
Epoch 1/5:  67%|██████▋   | 834/1250 [00:54<00:26, 15.68it/s, loss=1.86e+3, mean_absolute_error=4.42][A
Epoch 1/5:  67%|██████▋   | 834/1250 [00:54<00:26, 15.68it/s, loss=1.86e+3, mean_absolute_error=4.42][A
Epoch 1/5:  67%|██████▋   | 834/1250 [00:54<00:26, 15.68it/s, loss=1.86e+3, mean_absolute_error=4.42][A
Epoch 1/5:  67%|██████▋   | 836/1250 [00:54<00:26, 15.85it/s, loss=1.86e+3, mean_absolute_error=4.42][A
Epoch 1/5:  67%|██████▋   | 836/1250 [00:54<00:26, 15.85it/s, loss=1.86e+3, mean_absolute_error=4.42][A
Epoch 1/5:  67%|██████▋   | 836/1250 [00:54<00:26, 15.85it/s, loss=1.86e+3, mean_absolute_error=4.42][A
Epoch 1/5:  67%|██████▋   | 838/1250 [00:54<00:25, 15.91it/s, loss=1.86e+3, mean_absolute_error=4.42][A
Epoch 1/5:  67%|██████▋   | 838/1250 [00:54<00:25, 15.9

Epoch 1/5:  75%|███████▍  | 936/1250 [01:00<00:20, 15.30it/s, loss=1.78e+3, mean_absolute_error=4.42][A
Epoch 1/5:  75%|███████▍  | 936/1250 [01:00<00:20, 15.30it/s, loss=1.78e+3, mean_absolute_error=4.42][A
Epoch 1/5:  75%|███████▌  | 938/1250 [01:00<00:20, 15.36it/s, loss=1.78e+3, mean_absolute_error=4.42][A
Epoch 1/5:  75%|███████▌  | 938/1250 [01:00<00:20, 15.36it/s, loss=1.77e+3, mean_absolute_error=4.42][A
Epoch 1/5:  75%|███████▌  | 938/1250 [01:00<00:20, 15.36it/s, loss=1.77e+3, mean_absolute_error=4.42][A
Epoch 1/5:  75%|███████▌  | 940/1250 [01:00<00:20, 15.49it/s, loss=1.77e+3, mean_absolute_error=4.42][A
Epoch 1/5:  75%|███████▌  | 940/1250 [01:00<00:20, 15.49it/s, loss=1.77e+3, mean_absolute_error=4.42][A
Epoch 1/5:  75%|███████▌  | 940/1250 [01:01<00:20, 15.49it/s, loss=1.77e+3, mean_absolute_error=4.42][A
Epoch 1/5:  75%|███████▌  | 942/1250 [01:01<00:19, 15.63it/s, loss=1.77e+3, mean_absolute_error=4.42][A
Epoch 1/5:  75%|███████▌  | 942/1250 [01:01<00:19, 15.6

Epoch 1/5:  83%|████████▎ | 1040/1250 [01:07<00:13, 15.74it/s, loss=1.7e+3, mean_absolute_error=4.42][A
Epoch 1/5:  83%|████████▎ | 1040/1250 [01:07<00:13, 15.74it/s, loss=1.7e+3, mean_absolute_error=4.42][A
Epoch 1/5:  83%|████████▎ | 1040/1250 [01:07<00:13, 15.74it/s, loss=1.7e+3, mean_absolute_error=4.42][A
Epoch 1/5:  83%|████████▎ | 1042/1250 [01:07<00:13, 15.82it/s, loss=1.7e+3, mean_absolute_error=4.42][A
Epoch 1/5:  83%|████████▎ | 1042/1250 [01:07<00:13, 15.82it/s, loss=1.69e+3, mean_absolute_error=4.42][A
Epoch 1/5:  83%|████████▎ | 1042/1250 [01:07<00:13, 15.82it/s, loss=1.69e+3, mean_absolute_error=4.42][A
Epoch 1/5:  84%|████████▎ | 1044/1250 [01:07<00:13, 15.65it/s, loss=1.69e+3, mean_absolute_error=4.42][A
Epoch 1/5:  84%|████████▎ | 1044/1250 [01:07<00:13, 15.65it/s, loss=1.69e+3, mean_absolute_error=4.42][A
Epoch 1/5:  84%|████████▎ | 1044/1250 [01:07<00:13, 15.65it/s, loss=1.69e+3, mean_absolute_error=4.42][A
Epoch 1/5:  84%|████████▎ | 1046/1250 [01:07<00:12

Epoch 1/5:  91%|█████████▏| 1142/1250 [01:13<00:06, 16.14it/s, loss=1.62e+3, mean_absolute_error=4.42][A
Epoch 1/5:  91%|█████████▏| 1142/1250 [01:13<00:06, 16.14it/s, loss=1.62e+3, mean_absolute_error=4.42][A
Epoch 1/5:  92%|█████████▏| 1144/1250 [01:13<00:06, 16.12it/s, loss=1.62e+3, mean_absolute_error=4.42][A
Epoch 1/5:  92%|█████████▏| 1144/1250 [01:13<00:06, 16.12it/s, loss=1.62e+3, mean_absolute_error=4.42][A
Epoch 1/5:  92%|█████████▏| 1144/1250 [01:13<00:06, 16.12it/s, loss=1.62e+3, mean_absolute_error=4.42][A
Epoch 1/5:  92%|█████████▏| 1146/1250 [01:13<00:06, 16.16it/s, loss=1.62e+3, mean_absolute_error=4.42][A
Epoch 1/5:  92%|█████████▏| 1146/1250 [01:13<00:06, 16.16it/s, loss=1.62e+3, mean_absolute_error=4.42][A
Epoch 1/5:  92%|█████████▏| 1146/1250 [01:14<00:06, 16.16it/s, loss=1.62e+3, mean_absolute_error=4.42][A
Epoch 1/5:  92%|█████████▏| 1148/1250 [01:14<00:06, 16.17it/s, loss=1.62e+3, mean_absolute_error=4.42][A
Epoch 1/5:  92%|█████████▏| 1148/1250 [01:14<0

Epoch 1/5: 100%|█████████▉| 1244/1250 [01:20<00:00, 15.79it/s, loss=1.56e+3, mean_absolute_error=4.42][A
Epoch 1/5: 100%|█████████▉| 1246/1250 [01:20<00:00, 15.79it/s, loss=1.56e+3, mean_absolute_error=4.42][A
Epoch 1/5: 100%|█████████▉| 1246/1250 [01:20<00:00, 15.79it/s, loss=1.56e+3, mean_absolute_error=4.42][A
Epoch 1/5: 100%|█████████▉| 1246/1250 [01:20<00:00, 15.79it/s, loss=1.56e+3, mean_absolute_error=4.42][A
Epoch 1/5: 100%|█████████▉| 1248/1250 [01:20<00:00, 15.86it/s, loss=1.56e+3, mean_absolute_error=4.42][A
Epoch 1/5: 100%|█████████▉| 1248/1250 [01:20<00:00, 15.86it/s, loss=1.55e+3, mean_absolute_error=4.42][A
Epoch 1/5: 100%|█████████▉| 1248/1250 [01:20<00:00, 15.86it/s, loss=1.55e+3, mean_absolute_error=4.42][A
Epoch 1/5: 100%|██████████| 1250/1250 [01:20<00:00, 15.54it/s, loss=1.55e+3, mean_absolute_error=4.42][A
All epochs:   0%|          | 0/5 [01:20<?, ?it/s]


NameError: name 'keras' is not defined