In [1]:
import numpy as np
import tensorflow as tf
from keras.layers import Input, UpSampling2D
from keras.models import Model
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, AveragePooling2D, GlobalAveragePooling2D, BatchNormalization, Activation, Dropout, Concatenate, Reshape
from tensorflow.keras.optimizers import SGD, Adam

In [2]:
(x_train, y_train), (x_test, y_test) = cifar100.load_data()
num_classes = 100

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
[1m169001437/169001437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [3]:
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

In [4]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [5]:
input_dim = 20
latent_dim = 8

In [6]:
def generate_block_vectors(lmin, lmax, gmin, gmax, n):
    block_vectors = []
    for _ in range(n):
        n_layers = np.random.randint(lmin, lmax + 1)
        block_vector = [np.random.randint(gmin, gmax + 1) for _ in range(n_layers)]
        block_vector += [0] * (lmax - n_layers)  # Pad with zeros
        block_vectors.append(block_vector)
    return np.array(block_vectors)

In [7]:
lmin, lmax = 10, 20
gmin, gmax = 10, 32
n_samples =  15000
batch_size = 64

In [8]:
# Normalize block vectors
def normalize_block_vectors(block_vectors, gmin, gmax):
    return (block_vectors - gmin) / (gmax - gmin)

# Denormalize block vectors
def denormalize_block_vectors(block_vectors, gmin, gmax):
    return np.round(block_vectors * (gmax - gmin) + gmin)


In [9]:
block_vectors = generate_block_vectors(lmin, lmax, gmin, gmax, n_samples)

In [10]:
block_vectors[0]

array([10, 24, 23, 25, 32, 11, 28, 23, 20, 29, 13, 30, 25, 16, 17, 25, 29,
       32, 19, 11])

In [11]:
def create_pairwise_datasets(block_vectors, batch_size):
    n_samples = block_vectors.shape[0]
    indices1 = np.random.choice(n_samples, batch_size, replace=False)
    indices2 = np.random.choice(n_samples, batch_size, replace=False)
    input1 = block_vectors[indices1]
    input2 = block_vectors[indices2]
    return input1, input2

In [12]:
class LatentNormalizationLayer(tf.keras.layers.Layer):
    def __init__(self, epsilon=1e-8, scale_range=(0, 1), **kwargs):
        super(LatentNormalizationLayer, self).__init__(**kwargs)
        self.epsilon = epsilon
        self.min_val, self.max_val = scale_range

    def call(self, inputs):
        # Normalize to have zero mean and unit variance
        mean = tf.reduce_mean(inputs, axis=-1, keepdims=True)
        std_dev = tf.math.reduce_std(inputs, axis=-1, keepdims=True) + self.epsilon
        normalized = (inputs - mean) / std_dev
        
        # Scale to the specified range (e.g., [0, 1])
        scaled = 0.5 * (normalized + 1)  # Scale to range [0, 1]
        scaled = tf.clip_by_value(scaled, self.min_val, self.max_val)  # Clip to avoid out-of-range values
        return scaled


In [13]:
def create_encoder(lmax, latent_dim):
    inputs = tf.keras.Input(shape=(lmax,))
    x = tf.keras.layers.Dense(256, activation="relu")(inputs)
    x = tf.keras.layers.Dense(128, activation="relu")(x)
    outputs = tf.keras.layers.Dense(latent_dim, activation="sigmoid")(x)  # Values in [0, 1]
    encoder = tf.keras.Model(inputs, outputs, name="encoder")
    return encoder

In [14]:
def create_decoder(latent_dim, lmax, gmin, gmax):
    inputs = tf.keras.Input(shape=(latent_dim,))
    x = tf.keras.layers.Dense(128, activation="relu")(inputs)
    x = tf.keras.layers.Dense(256, activation="relu")(x)
    x = tf.keras.layers.Dense(lmax, activation="sigmoid")(x)  # Output normalized to [0, 1]
    
    # Denormalize and round to integers
    outputs = tf.keras.layers.Lambda(lambda x: tf.round(x * (gmax - gmin) + gmin))(x)
    
    decoder = tf.keras.Model(inputs, outputs, name="decoder")
    return decoder

In [15]:
encoder = create_encoder(lmax, latent_dim)
decoder = create_decoder(latent_dim, lmax, gmin, gmax)

inputs = tf.keras.Input(shape=(lmax,))
encoded = encoder(inputs)
decoded = decoder(encoded)
autoencoder = tf.keras.Model(inputs, decoded, name="autoencoder")

In [16]:
def reconstruction_loss(input_vec, reconstructed_vec):
    return tf.reduce_mean(tf.square(input_vec - reconstructed_vec))

In [17]:
def architecture_similarity_loss(input1, input2, latent1, latent2, gmin, gmax, lmax, latent_dim):
    # Normalize input block vectors to [0, 1]
    normalized_input1 = (input1 - gmin) / (gmax - gmin)
    normalized_input2 = (input2 - gmin) / (gmax - gmin)

    # Architecture distance for block vectors
    d_arch_blocks = tf.reduce_mean(tf.abs(normalized_input1 - normalized_input2), axis=1) / lmax

    # Architecture distance for latent vectors
    d_arch_latents = tf.reduce_mean(tf.abs(latent1 - latent2), axis=1) / latent_dim

    # Architecture similarity loss
    # Cast d_arch_blocks to float32
    return tf.reduce_mean(tf.square(tf.cast(d_arch_blocks, tf.float32) - d_arch_latents))

In [18]:
def scale_similarity_loss(input1, input2, latent1, latent2, gmin, gmax, lmax, latent_dim):
    # Normalize input block vectors to [0, 1]
    normalized_input1 = (input1 - gmin) / (gmax - gmin)
    normalized_input2 = (input2 - gmin) / (gmax - gmin)

    # Scale distance for block vectors
    d_scale_blocks = tf.abs(tf.reduce_sum(normalized_input1, axis=1) - tf.reduce_sum(normalized_input2, axis=1))

    # Scale distance for latent vectors
    d_scale_latents = tf.abs(tf.reduce_sum(latent1, axis=1) - tf.reduce_sum(latent2, axis=1))

    # Scale similarity loss
    return tf.reduce_mean(tf.square(tf.cast(d_scale_blocks, tf.float32) - d_scale_latents))

In [19]:
sample_block_vector = np.random.randint(gmin, gmax + 1, size=(1, lmax))
latent_vector = encoder(sample_block_vector)
reconstructed_block_vector = decoder(latent_vector)

print(tf.reduce_mean(tf.square(sample_block_vector - reconstructed_block_vector)))

print("Original Block Vector:", sample_block_vector)
print("Reconstructed Block Vector:", reconstructed_block_vector.numpy().astype(int))

tf.Tensor(40.75, shape=(), dtype=float32)
Original Block Vector: [[11 13 22 29 23 17 20 10 24 22 27 17 31 15 16 22 10 27 18 15]]
Reconstructed Block Vector: [[21 21 22 20 22 22 22 22 21 21 21 21 22 21 21 21 21 22 21 21]]


In [20]:
optimizer = Adam(learning_rate=0.005)

In [21]:
print(n_samples//batch_size)

234


In [22]:
epochs=10

In [23]:
# Training loop
for epoch in range(10):  # Number of epochs
    print(f"Epoch {epoch + 1}")
    total_loss = 0
    total_batches = n_samples // batch_size

    for _ in range(total_batches):
        # Create pairwise datasets
        input1, input2 = create_pairwise_datasets(block_vectors, batch_size)

        # Perform a training step
        with tf.GradientTape() as tape:
            # Pass the inputs through the autoencoder
            latent1 = encoder(input1, training=True)
            latent2 = encoder(input2, training=True)
            recon1 = decoder(latent1, training=True)
            recon2 = decoder(latent2, training=True)

            # Calculate custom losses
            recon_loss_1 = reconstruction_loss(input1, recon1)
            recon_loss_2 = reconstruction_loss(input2, recon2)
            arch_sim_loss = architecture_similarity_loss(input1, input2, latent1, latent2, gmin, gmax, lmax, latent_dim)
            scale_sim_loss = scale_similarity_loss(input1, input2, latent1, latent2, gmin, gmax, lmax, latent_dim)

            # Total loss
            total_batch_loss = recon_loss_1 + recon_loss_2 + arch_sim_loss + scale_sim_loss

        # Compute gradients and apply them
        gradients = tape.gradient(total_batch_loss, autoencoder.trainable_variables)
        optimizer.apply_gradients(zip(gradients, autoencoder.trainable_variables))

        # Accumulate total loss for monitoring
        total_loss += total_batch_loss.numpy()

    # Average loss for the epoch
    avg_loss = total_loss / total_batches
    reconstructed = autoencoder.predict(block_vectors)
    mse = np.mean(np.square(block_vectors - reconstructed))
    print(f"Epoch {epoch + 1} completed. Average Loss: {avg_loss:.4f} , Reconstruction MSE: {mse:.4f}\n")

print("Training completed.")

Epoch 1




[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
Epoch 1 completed. Average Loss: 296.9353 , Reconstruction MSE: 145.6962

Epoch 2
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Epoch 2 completed. Average Loss: 294.5817 , Reconstruction MSE: 145.4779

Epoch 3
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Epoch 3 completed. Average Loss: 295.9600 , Reconstruction MSE: 146.0742

Epoch 4
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Epoch 4 completed. Average Loss: 295.9015 , Reconstruction MSE: 145.9635

Epoch 5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Epoch 5 completed. Average Loss: 296.2026 , Reconstruction MSE: 146.0551

Epoch 6
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Epoch 6 completed. Average Loss: 296.0899 , Reconstruction MSE: 145.9487

Epoch 7
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [24]:
sample_block_vector = np.random.randint(gmin, gmax + 1, size=(1, lmax))
latent_vector = encoder(sample_block_vector)
reconstructed_block_vector = decoder(latent_vector)

print(tf.reduce_mean(tf.square(sample_block_vector - reconstructed_block_vector)))

print("Original Block Vector:", sample_block_vector)
print("Reconstructed Block Vector:", reconstructed_block_vector.numpy().astype(int))

tf.Tensor(38.15, shape=(), dtype=float32)
Original Block Vector: [[28 25 14 27 14 18 15 15 12 27 22 15 27 17 23 29 25 29 14 15]]
Reconstructed Block Vector: [[22 21 22 20 22 22 22 22 21 21 21 20 22 21 20 21 21 22 21 22]]


In [25]:
input1, input2 = create_pairwise_datasets(block_vectors, batch_size)

In [26]:
input1[0]

array([10, 14, 28, 21, 12, 20, 10, 12, 27, 12, 19, 30, 26,  0,  0,  0,  0,
        0,  0,  0])

In [27]:
input2[0]

array([22, 29, 30, 28, 23, 31, 24, 20, 16, 10, 30,  0,  0,  0,  0,  0,  0,
        0,  0,  0])

In [28]:
latent1 = encoder(input1)
latent1[0]

<tf.Tensor: shape=(8,), dtype=float32, numpy=array([1., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)>

In [29]:
print(f"Shape of x_train: {x_train.shape}")

Shape of x_train: (50000, 32, 32, 3)


In [30]:
print(block_vectors.shape)

(15000, 20)


In [31]:
print(autoencoder.output_shape)

(None, 20)


In [32]:
# import matplotlib.pyplot as plt
# import numpy as np

# def plot_relationships(autoencoder, encoder, block_vectors, gmin, gmax):
#     # Normalize block vectors to [0, 1] for comparison
#     normalized_block_vectors = (block_vectors - gmin) / (gmax - gmin)

#     # Encode block vectors to latent space
#     latent_vectors = encoder.predict(normalized_block_vectors)

#     # Compute L1 distances in the latent space
#     n = len(block_vectors)
#     latent_distances = []
#     block_distances = []

#     for i in range(n):
#         for j in range(i + 1, n):
#             block_distance = np.sum(np.abs(normalized_block_vectors[i] - normalized_block_vectors[j]))
#             latent_distance = np.sum(np.abs(latent_vectors[i] - latent_vectors[j]))
#             block_distances.append(block_distance)
#             latent_distances.append(latent_distance)

#     # Plot relationship between L1 distances
#     plt.figure(figsize=(8, 6))
#     plt.scatter(block_distances, latent_distances, alpha=0.6)
#     plt.xlabel("L1 Distance (Normalized Block Vectors)")
#     plt.ylabel("L1 Distance (Latent Space)")
#     plt.title("Relationship Between Block Vector and Latent Space Distances")
#     plt.grid(True)
#     plt.show()

#     # Sum of values in block vectors and latent vectors
#     block_sums = np.sum(block_vectors, axis=1)
#     latent_sums = np.sum(latent_vectors, axis=1)

#     # Plot relationship between sum of values
#     plt.figure(figsize=(8, 6))
#     plt.scatter(block_sums, latent_sums, alpha=0.6)
#     plt.xlabel("Sum of Block Vector Values")
#     plt.ylabel("Sum of Latent Vector Values")
#     plt.title("Relationship Between Block Vector and Latent Vector Sums")
#     plt.grid(True)
#     plt.show()

# # Example usage
# plot_relationships(autoencoder, encoder, block_vectors, gmin, gmax)

In [33]:
def build_dense_block(block_vector):
    block_vector = np.round(block_vector).astype(int)  # Ensure integers
    inputs = Input(shape=(32, 32, 3))
    x = inputs

    for growth_rate in block_vector:
        if growth_rate == 0:
            continue

        # Convolution-BatchNorm-ReLU sequence
        conv = Conv2D(growth_rate, kernel_size=(3, 3), strides=1, padding='same')
        bn = BatchNormalization()
        act = Activation('relu')

        # Apply sequentially
        layer = conv(x)
        layer = bn(layer)
        layer = act(layer)

        # Concatenate to the block
        x = Concatenate()([x, layer])

    x = Conv2D(filters=3, kernel_size=1, padding='same', activation='relu')(x)
    
    return Model(inputs, x)

In [34]:
def initialize_particles(encoder, n_particles, block_vectors):
    particles = []
    for _ in range(n_particles):
        idx = np.random.randint(0, len(block_vectors))
        block_vector = block_vectors[idx]
        latent_vector = encoder.predict(block_vector.reshape(1, -1))
        particles.append(latent_vector.flatten())
    return np.array(particles)

In [35]:
n_particles = 3
particles = initialize_particles(encoder, n_particles, block_vectors)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


In [36]:
def basic_fitness_evaluation(particles, decoder, x_train, y_train, x_test, y_test, rb, kbmax, lbt):
    db_train_size = int(rb * len(x_train))
    db_train = x_train[:db_train_size]
    db_train_labels = y_train[:db_train_size]

    fitness_scores = []
    for particle in particles:
        block_vector = decoder.predict(particle.reshape(1, -1)).flatten()
        dense_block = build_dense_block(block_vector)

        model = tf.keras.Sequential([
            dense_block,
            AveragePooling2D(pool_size=(2, 2)),
            Flatten(),
            Dense(num_classes, activation='softmax')
        ])

        model.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

        epoch, loss = 0, float('inf')
        while epoch < kbmax and loss >= lbt:
            history = model.fit(db_train, db_train_labels, epochs=1, verbose=0)
            loss = history.history['loss'][-1]
            epoch += 1

        accuracy = model.evaluate(x_test, y_test, verbose=0)[1]  # Fetch accuracy from the evaluation result
        fitness_scores.append(accuracy)

    return np.array(fitness_scores)

In [37]:
def progressive_fitness_evaluation(particles, fitness_scores, decoder, x_train, y_train, x_test, y_test, rprog, kpmax, lpt):
    progressive_scores = np.zeros(len(particles))
    top_indices = np.argsort(fitness_scores)[-len(fitness_scores) // 3:]

    for idx in top_indices:
        particle = particles[idx]
        block_vector = decoder.predict(particle.reshape(1, -1)).flatten()
        dense_block = build_dense_block(block_vector)

        for proportion in [0.2, 0.4]:
            dp_train_size = int(proportion * len(x_train))
            dp_train = x_train[:dp_train_size]
            dp_train_labels = y_train[:dp_train_size]

            model = tf.keras.Sequential([
                dense_block,
                AveragePooling2D(pool_size=(2, 2)),
                Flatten(),
                Dense(num_classes, activation='softmax')
            ])

            model.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9), loss='categorical_crossentropy',metrics=['accuracy'])

            epoch, loss = 0, float('inf')
            while epoch < kpmax and loss >= lpt:
                history = model.fit(dp_train, dp_train_labels, epochs=1, verbose=0)
                loss = history.history['loss'][-1]
                epoch += 1

            accuracy = model.evaluate(x_test, y_test, verbose=0)[1]
            progressive_scores[idx] = max(progressive_scores[idx], accuracy)

    return progressive_scores

In [38]:
def pso_algorithm(particles, decoder, x_train, y_train, x_test, y_test, n_iterations, rb, kbmax, lbt, rprog, kpmax, lpt):
    velocities = np.zeros_like(particles)
    personal_best_positions = np.copy(particles)
    personal_best_scores = basic_fitness_evaluation(particles, decoder, x_train, y_train, x_test, y_test, rb, kbmax, lbt)
    global_best_position = personal_best_positions[np.argmax(personal_best_scores)]
    global_best_score = np.max(personal_best_scores)

    w, c1, c2 = 0.7298, 1.49618, 1.49618  # PSO hyperparameters
    velocity_max = 0.05

    for iteration in range(n_iterations):
        for i in range(len(particles)):
            r1, r2 = np.random.rand(), np.random.rand()
            velocities[i] = w * velocities[i] + c1 * r1 * (personal_best_positions[i] - particles[i]) + c2 * r2 * (global_best_position - particles[i])

            # Clamp velocities
            velocities[i] = np.clip(velocities[i], -velocity_max, velocity_max)

            # Update particle positions
            particles[i] += velocities[i]

        fitness_scores = basic_fitness_evaluation(particles, decoder, x_train, y_train, x_test, y_test, rb, kbmax, lbt)
        progressive_scores = progressive_fitness_evaluation(particles, fitness_scores, decoder, x_train, y_train, x_test, y_test, rprog, kpmax, lpt)

        for i in range(len(particles)):
            if fitness_scores[i] > personal_best_scores[i]:
                personal_best_positions[i] = particles[i]
                personal_best_scores[i] = fitness_scores[i]

        global_best_position = personal_best_positions[np.argmax(personal_best_scores)]
        global_best_score = np.max(personal_best_scores)

        # Update the global best score and position
        print(f"Iteration {iteration + 1}/{n_iterations}, Best Fitness: {global_best_score}")

        # Early stopping condition: If the global best score doesn't improve for 5 iterations
        epsilon = 1e-6  # A small tolerance value

        if iteration > 5 and np.abs(np.max(personal_best_scores) - global_best_score) < epsilon:
              break

    return global_best_position

In [41]:
# PSO parameters
n_iterations = 3
rb = 0.1  # Basic fitness training proportion
kbmax = 5  # Max epochs for basic fitness
lbt = 0.5  # Loss threshold for basic fitness

rprog = [0.2, 0.4]  # Progressive training proportions
kpmax = 5  # Max epochs for progressive fitness
lpt = 0.5  # Loss threshold for progressive fitness

# Run PSO to find the best dense block
global_best_particle = pso_algorithm(particles, decoder, x_train, y_train, x_test, y_test, n_iterations, rb, kbmax, lbt, rprog, kpmax, lpt)

# Decode the global best particle into a dense block
final_block_vector = decoder.predict(global_best_particle.reshape(1, -1)).flatten()
final_dense_block = build_dense_block(final_block_vector)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Iteration 1/3, Best Fitness: 0.05829999968409538
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Iteration 2/3, Best Fitness: 0.05829999968409538
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

In [42]:
print("Output shape of final_dense_block:", final_dense_block.output_shape)

Output shape of final_dense_block: (None, 32, 32, 3)


In [43]:
def stack_dense_blocks(block, num_blocks, input_shape):
    inputs = Input(shape=input_shape)
    x = inputs
    for _ in range(num_blocks):
        x = block(x)
    x = GlobalAveragePooling2D()(x)  # Pool the feature map to a 1D vector
    outputs = Dense(100, activation='softmax')(x)  # Match 100 classes
    return Model(inputs, outputs)

In [44]:
smax = 2
input_shape = (32, 32, 3)

In [45]:
best_model = None
best_accuracy = 0

for num_blocks in range(1, smax + 1):
    model = stack_dense_blocks(final_dense_block, num_blocks, input_shape)
    model.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    
    model.fit(x_train, y_train, epochs=5, batch_size=64, verbose=1, validation_split=0.2)
    _, accuracy = model.evaluate(x_test, y_test, verbose=0)
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

print(f"Best accuracy achieved: {best_accuracy}")

Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 166ms/step - accuracy: 0.0203 - loss: 4.5193 - val_accuracy: 0.0204 - val_loss: 6.1324
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 159ms/step - accuracy: 0.0467 - loss: 4.1824 - val_accuracy: 0.0202 - val_loss: 6.0363
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 160ms/step - accuracy: 0.0559 - loss: 4.0498 - val_accuracy: 0.0437 - val_loss: 4.2432
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 159ms/step - accuracy: 0.0653 - loss: 3.9816 - val_accuracy: 0.0558 - val_loss: 4.0544
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 159ms/step - accuracy: 0.0723 - loss: 3.9234 - val_accuracy: 0.0675 - val_loss: 4.0018
Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 328ms/step - accuracy: 0.0263 - loss: 4.4188 - val_accuracy: 0.0087 - val_loss: 4.6172
Epoch 2/5
[