# **Training Deep Neural Networks**

## The Vanishing/Gadients Problem

In [8]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from functools import partial

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# ============================================================================
# 1. HANDLING VANISHING/EXPLODING GRADIENT PROBLEM
# ============================================================================

def create_model_with_proper_initialization(input_shape, num_classes):
    """
    Model with proper weight initialization to handle vanishing/exploding gradients
    """
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=input_shape),

        # BatchNormalization helps with gradient flow
        keras.layers.BatchNormalization(),

        # Dense layer with He initialization for ReLU-like activations
        keras.layers.Dense(300, kernel_initializer="he_normal", use_bias=False),
        keras.layers.BatchNormalization(),
        keras.layers.Activation("elu"),  # ELU activation helps with gradient flow

        keras.layers.Dense(100, kernel_initializer="he_normal", use_bias=False),
        keras.layers.BatchNormalization(),
        keras.layers.Activation("elu"),

        # Output layer with appropriate initialization
        keras.layers.Dense(num_classes, activation="softmax",
                          kernel_initializer="glorot_uniform")
    ])
    return model

# Alternative: Using SELU activation with LeCun initialization
def create_selu_model(input_shape, num_classes):
    """
    Model using SELU activation which has self-normalizing properties
    """
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=input_shape),
        keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
        keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    return model

# Custom variance scaling initializer
def create_custom_initializer():
    """
    Custom He initialization with average fan mode
    """
    return keras.initializers.VarianceScaling(
        scale=2.0,
        mode='fan_avg',
        distribution='uniform'
    )


## Transfer Learning

In [9]:

# ============================================================================
# 2. REUSING PRETRAINED LAYERS (TRANSFER LEARNING)
# ============================================================================

def create_base_model(input_shape, num_classes):
    """
    Create a base model that can be used for transfer learning
    """
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=input_shape),
        keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"),
        keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    return model

def setup_transfer_learning(base_model_path, new_num_classes):
    """
    Setup transfer learning from a pretrained model
    """
    # Load pretrained model
    model_A = keras.models.load_model(base_model_path)

    # Create new model using all layers except the last one
    model_B = keras.models.Sequential(model_A.layers[:-1])

    # Add new output layer for different task
    model_B.add(keras.layers.Dense(new_num_classes, activation="softmax"))

    # Clone the original model for comparison
    model_A_clone = keras.models.clone_model(model_A)
    model_A_clone.set_weights(model_A.get_weights())

    return model_B, model_A_clone

def fine_tune_model(model, X_train, y_train, X_valid, y_valid):
    """
    Two-phase fine-tuning: freeze then unfreeze
    """
    # Phase 1: Freeze all layers except the last one
    for layer in model.layers[:-1]:
        layer.trainable = False

    # Compile and train with frozen layers
    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer="sgd",
        metrics=["accuracy"]
    )

    print("Phase 1: Training with frozen layers...")
    history_phase1 = model.fit(
        X_train, y_train,
        epochs=4,
        validation_data=(X_valid, y_valid),
        verbose=1
    )

    # Phase 2: Unfreeze all layers and train with lower learning rate
    for layer in model.layers[:-1]:
        layer.trainable = True

    # Use lower learning rate for fine-tuning
    optimizer = keras.optimizers.SGD(learning_rate=1e-4)
    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=optimizer,
        metrics=["accuracy"]
    )

    print("Phase 2: Fine-tuning all layers...")
    history_phase2 = model.fit(
        X_train, y_train,
        epochs=16,
        validation_data=(X_valid, y_valid),
        verbose=1
    )

    return history_phase1, history_phase2


## Faster Optimizers

In [10]:

# ============================================================================
# 3. FASTER OPTIMIZERS
# ============================================================================

def get_optimizers():
    """
    Collection of different optimizers with typical hyperparameters
    """
    optimizers = {
        'sgd_momentum': keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),
        'rmsprop': keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9),
        'adam': keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999),
        'sgd_decay': keras.optimizers.SGD(learning_rate=0.01, weight_decay=1e-4)
    }
    return optimizers

# Learning rate scheduling
def exponential_decay_schedule(lr0=0.01, decay_steps=20):
    """
    Exponential decay learning rate schedule
    """
    def exponential_decay_fn(epoch):
        return lr0 * 0.1**(epoch / decay_steps)
    return exponential_decay_fn

def piecewise_constant_schedule(epoch):
    """
    Piecewise constant learning rate schedule
    """
    if epoch < 5:
        return 0.01
    elif epoch < 15:
        return 0.005
    else:
        return 0.001

def setup_learning_rate_scheduling():
    """
    Setup different learning rate scheduling strategies
    """
    # Method 1: Using callback with custom function
    exponential_decay_fn = exponential_decay_schedule(lr0=0.01, decay_steps=20)
    lr_scheduler_callback = keras.callbacks.LearningRateScheduler(exponential_decay_fn)

    # Method 2: ReduceLROnPlateau callback
    reduce_lr_callback = keras.callbacks.ReduceLROnPlateau(
        factor=0.5,
        patience=5,
        min_lr=1e-6
    )

    # Method 3: Built-in exponential decay schedule
    steps_per_epoch = 1875  # Example: 60000 samples / 32 batch_size
    decay_steps = 20 * steps_per_epoch  # 20 epochs
    exponential_decay_schedule_builtin = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=0.01,
        decay_steps=decay_steps,
        decay_rate=0.1
    )

    return lr_scheduler_callback, reduce_lr_callback, exponential_decay_schedule_builtin


## Avoiding Overfitting through Regularization

In [11]:

# ============================================================================
# 4. AVOIDING OVERFITTING THROUGH REGULARIZATION
# ============================================================================

# L1 and L2 Regularization
def create_regularized_model(input_shape, num_classes, l2_reg=0.01):
    """
    Model with L2 regularization
    """
    # Create a partial function for regularized dense layer
    RegularizedDense = partial(
        keras.layers.Dense,
        activation="elu",
        kernel_initializer="he_normal",
        kernel_regularizer=keras.regularizers.l2(l2_reg)
    )

    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=input_shape),
        RegularizedDense(300),
        RegularizedDense(100),
        RegularizedDense(10, activation="softmax",
                        kernel_initializer="glorot_uniform",
                        kernel_regularizer=None)  # No regularization on output layer
    ])
    return model

# Dropout Regularization
def create_dropout_model(input_shape, num_classes, dropout_rate=0.2):
    """
    Model with dropout regularization
    """
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=input_shape),
        keras.layers.Dropout(rate=dropout_rate),
        keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=dropout_rate),
        keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=dropout_rate),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    return model

# Monte Carlo Dropout for uncertainty estimation
class MCDropout(keras.layers.Dropout):
    """
    Monte Carlo Dropout layer that applies dropout during inference
    """
    def call(self, inputs, training=None):
        return super().call(inputs, training=True)

def create_mc_dropout_model(input_shape, num_classes, dropout_rate=0.2):
    """
    Model with Monte Carlo Dropout
    """
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=input_shape),
        MCDropout(rate=dropout_rate),
        keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"),
        MCDropout(rate=dropout_rate),
        keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
        MCDropout(rate=dropout_rate),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    return model

def mc_dropout_prediction(model, X_test, n_samples=100):
    """
    Make predictions using Monte Carlo Dropout
    """
    y_probas = np.stack([
        model(X_test, training=True) for _ in range(n_samples)
    ])
    y_proba = y_probas.mean(axis=0)
    y_std = y_probas.std(axis=0)
    return y_proba, y_std

# Max Norm Constraint
def create_constrained_model(input_shape, num_classes, max_norm=1.0):
    """
    Model with max norm constraint on weights
    """
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=input_shape),
        keras.layers.Dense(300, activation="elu",
                          kernel_initializer="he_normal",
                          kernel_constraint=keras.constraints.max_norm(max_norm)),
        keras.layers.Dense(100, activation="elu",
                          kernel_initializer="he_normal",
                          kernel_constraint=keras.constraints.max_norm(max_norm)),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    return model

# ============================================================================
# 5. COMPLETE TRAINING PIPELINE
# ============================================================================

def create_comprehensive_model(input_shape, num_classes):
    """
    Comprehensive model combining multiple techniques
    """
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=input_shape),

        # Batch normalization for gradient stability
        keras.layers.BatchNormalization(),

        # Regularized dense layer with dropout
        keras.layers.Dense(300,
                          kernel_initializer="he_normal",
                          kernel_regularizer=keras.regularizers.l2(0.01),
                          kernel_constraint=keras.constraints.max_norm(1.0),
                          use_bias=False),
        keras.layers.BatchNormalization(),
        keras.layers.Activation("elu"),
        keras.layers.Dropout(0.3),

        keras.layers.Dense(100,
                          kernel_initializer="he_normal",
                          kernel_regularizer=keras.regularizers.l2(0.01),
                          kernel_constraint=keras.constraints.max_norm(1.0),
                          use_bias=False),
        keras.layers.BatchNormalization(),
        keras.layers.Activation("elu"),
        keras.layers.Dropout(0.3),

        keras.layers.Dense(num_classes, activation="softmax")
    ])
    return model

def train_comprehensive_model(X_train, y_train, X_valid, y_valid, input_shape, num_classes):
    """
    Complete training pipeline with all techniques
    """
    # Create model
    model = create_comprehensive_model(input_shape, num_classes)

    # Setup optimizer with learning rate scheduling
    initial_learning_rate = 0.001
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate,
        decay_steps=1000,
        decay_rate=0.96,
        staircase=True
    )
    optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)

    # Compile model
    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=optimizer,
        metrics=["accuracy"]
    )

    # Setup callbacks (Remove ReduceLROnPlateau since we're using LearningRateSchedule)
    callbacks = [
        keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        keras.callbacks.ModelCheckpoint("best_model.h5", save_best_only=True)
    ]

    # Train model
    history = model.fit(
        X_train, y_train,
        epochs=50,  # Reduced epochs for faster training
        batch_size=32,
        validation_data=(X_valid, y_valid),
        callbacks=callbacks,
        verbose=1
    )

    return model, history

def train_comprehensive_model_simple(X_train, y_train, X_valid, y_valid, input_shape, num_classes):
    """
    Simplified training pipeline without learning rate conflicts
    """
    # Create model
    model = create_comprehensive_model(input_shape, num_classes)

    # Use simple Adam optimizer
    optimizer = keras.optimizers.Adam(learning_rate=0.001)

    # Compile model
    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=optimizer,
        metrics=["accuracy"]
    )

    # Setup callbacks with ReduceLROnPlateau
    callbacks = [
        keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-7),
        keras.callbacks.ModelCheckpoint("best_model.h5", save_best_only=True)
    ]

    # Train model
    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_valid, y_valid),
        callbacks=callbacks,
        verbose=1
    )

    return model, history

# ============================================================================
# 6. EXAMPLE USAGE DAN MAIN PROGRAM
# ============================================================================

def example_usage():
    """
    Example of how to use the complete pipeline
    """
    # Load and preprocess data (example with MNIST)
    (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

    # Normalize data
    X_train = X_train.astype("float32") / 255.0
    X_test = X_test.astype("float32") / 255.0

    # Split validation set
    X_valid, X_train = X_train[:5000], X_train[5000:]
    y_valid, y_train = y_train[:5000], y_train[5000:]

    input_shape = [28, 28]
    num_classes = 10

    print("Training comprehensive model...")
    model, history = train_comprehensive_model(
        X_train, y_train, X_valid, y_valid, input_shape, num_classes
    )

    # Evaluate model
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test accuracy: {test_acc:.4f}")

    return model, history

def main():
    """
    Main function - di sini Anda letakkan kode untuk menjalankan model
    """
    print("=== Deep Neural Network Training Pipeline ===\n")

    # 1. Load dan preprocess data
    print("1. Loading and preprocessing data...")
    (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
    X_train = X_train.astype("float32") / 255.0
    X_test = X_test.astype("float32") / 255.0
    X_valid, X_train = X_train[:5000], X_train[5000:]
    y_valid, y_train = y_train[:5000], y_train[5000:]

    print(f"Training data shape: {X_train.shape}")
    print(f"Validation data shape: {X_valid.shape}")
    print(f"Test data shape: {X_test.shape}\n")

    # 2. CONTOH 1: Model dasar dengan teknik anti-overfitting
    print("2. Creating comprehensive model with anti-overfitting techniques...")
    model = create_comprehensive_model([28, 28], 10)
    print("Model architecture:")
    model.summary()
    print()

    # 3. CONTOH 2: Training lengkap dengan semua teknik (versi sederhana)
    print("3. Training model with all advanced techniques...")
    model, history = train_comprehensive_model_simple(
        X_train, y_train, X_valid, y_valid, [28, 28], 10
    )

    # Evaluate final model
    print("4. Evaluating model...")
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Final test accuracy: {test_acc:.4f}\n")

    # 4. CONTOH 3: Transfer Learning (perlu model yang sudah ada)
    print("5. Transfer Learning Example:")
    print("   Note: Untuk transfer learning, Anda perlu model yang sudah dilatih sebelumnya")
    print("   Simpan model saat ini untuk contoh:")

    # Simpan model untuk contoh transfer learning
    model.save("base_model.h5")
    print("   Model saved as 'base_model.h5'")

    # Contoh setup transfer learning
    try:
        print("   Setting up transfer learning...")
        model_B, model_A_clone = setup_transfer_learning("base_model.h5", 2)
        print("   Transfer learning model created successfully!")
        print("   New model architecture for binary classification:")
        model_B.summary()
    except Exception as e:
        print(f"   Transfer learning setup failed: {e}")

    print("\n=== Training Complete ===")

    # 5. CONTOH LAINNYA: Demonstrasi berbagai teknik
    print("\n6. Additional Examples:")

    # Model dengan dropout
    print("   - Dropout model:")
    dropout_model = create_dropout_model([28, 28], 10, dropout_rate=0.3)

    # Model dengan regularisasi L2
    print("   - L2 Regularized model:")
    l2_model = create_regularized_model([28, 28], 10, l2_reg=0.01)

    # Model dengan SELU activation
    print("   - SELU model:")
    selu_model = create_selu_model([28, 28], 10)

    # Monte Carlo Dropout
    print("   - Monte Carlo Dropout model:")
    mc_model = create_mc_dropout_model([28, 28], 10)

    print("   All model variants created successfully!")

    return model, history

# ============================================================================
# JALANKAN PROGRAM UTAMA
# ============================================================================

if __name__ == "__main__":
    # CARA 2: Atau jalankan contoh-contoh spesifik di bawah ini:

    # Load data terlebih dahulu
     (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
     X_train = X_train.astype("float32") / 255.0
     X_test = X_test.astype("float32") / 255.0
     X_valid, X_train = X_train[:5000], X_train[5000:]
     y_valid, y_train = y_train[:5000], y_train[5000:]

    # Untuk model dasar dengan teknik anti-overfitting
     model = create_comprehensive_model([28, 28], 10)
     print("Comprehensive model created!")

    # Untuk training lengkap dengan semua teknik
     model, history = train_comprehensive_model(X_train, y_train, X_valid, y_valid, [28, 28], 10)
     print("Model training completed!")

     # Untuk transfer learning (pastikan ada base_model.h5)
     # model_B, model_A_clone = setup_transfer_learning("base_model.h5", 2)
     # print("Transfer learning setup completed!")

Comprehensive model created!
Epoch 1/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8363 - loss: 1.7799



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 5ms/step - accuracy: 0.8363 - loss: 1.7795 - val_accuracy: 0.9080 - val_loss: 0.5857
Epoch 2/50
[1m1704/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.8857 - loss: 0.6392



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - accuracy: 0.8857 - loss: 0.6391 - val_accuracy: 0.9210 - val_loss: 0.5372
Epoch 3/50
[1m1706/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.8922 - loss: 0.6061



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8922 - loss: 0.6060 - val_accuracy: 0.9258 - val_loss: 0.4922
Epoch 4/50
[1m1705/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9003 - loss: 0.5711



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9003 - loss: 0.5710 - val_accuracy: 0.9290 - val_loss: 0.4831
Epoch 5/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9048 - loss: 0.5411



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9048 - loss: 0.5411 - val_accuracy: 0.9284 - val_loss: 0.4624
Epoch 6/50
[1m1715/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9088 - loss: 0.5212



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9088 - loss: 0.5211 - val_accuracy: 0.9334 - val_loss: 0.4248
Epoch 7/50
[1m1709/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9105 - loss: 0.4953



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9105 - loss: 0.4953 - val_accuracy: 0.9438 - val_loss: 0.3920
Epoch 8/50
[1m1701/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9177 - loss: 0.4649



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9177 - loss: 0.4649 - val_accuracy: 0.9454 - val_loss: 0.3818
Epoch 9/50
[1m1701/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9207 - loss: 0.4479



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9207 - loss: 0.4478 - val_accuracy: 0.9506 - val_loss: 0.3558
Epoch 10/50
[1m1713/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9224 - loss: 0.4268



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9224 - loss: 0.4268 - val_accuracy: 0.9492 - val_loss: 0.3458
Epoch 11/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9246 - loss: 0.4109



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9246 - loss: 0.4109 - val_accuracy: 0.9556 - val_loss: 0.3176
Epoch 12/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9299 - loss: 0.3904



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9299 - loss: 0.3904 - val_accuracy: 0.9544 - val_loss: 0.3077
Epoch 13/50
[1m1711/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9294 - loss: 0.3834



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9294 - loss: 0.3834 - val_accuracy: 0.9548 - val_loss: 0.3022
Epoch 14/50
[1m1717/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9319 - loss: 0.3663



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9319 - loss: 0.3663 - val_accuracy: 0.9584 - val_loss: 0.2825
Epoch 15/50
[1m1716/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9362 - loss: 0.3484



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9362 - loss: 0.3483 - val_accuracy: 0.9562 - val_loss: 0.2812
Epoch 16/50
[1m1704/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9365 - loss: 0.3374



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9365 - loss: 0.3373 - val_accuracy: 0.9570 - val_loss: 0.2680
Epoch 17/50
[1m1706/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9395 - loss: 0.3237



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9395 - loss: 0.3237 - val_accuracy: 0.9624 - val_loss: 0.2511
Epoch 18/50
[1m1716/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9403 - loss: 0.3132



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9403 - loss: 0.3132 - val_accuracy: 0.9620 - val_loss: 0.2492
Epoch 19/50
[1m1714/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9447 - loss: 0.2962



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.9447 - loss: 0.2962 - val_accuracy: 0.9610 - val_loss: 0.2350
Epoch 20/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9440 - loss: 0.2889



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9440 - loss: 0.2889 - val_accuracy: 0.9652 - val_loss: 0.2262
Epoch 21/50
[1m1703/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9443 - loss: 0.2861



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9443 - loss: 0.2860 - val_accuracy: 0.9648 - val_loss: 0.2212
Epoch 22/50
[1m1703/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9476 - loss: 0.2719



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9476 - loss: 0.2718 - val_accuracy: 0.9648 - val_loss: 0.2115
Epoch 23/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9480 - loss: 0.2651 - val_accuracy: 0.9650 - val_loss: 0.2168
Epoch 24/50
[1m1704/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9491 - loss: 0.2589



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9491 - loss: 0.2589 - val_accuracy: 0.9684 - val_loss: 0.1994
Epoch 25/50
[1m1700/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9519 - loss: 0.2442



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9519 - loss: 0.2441 - val_accuracy: 0.9668 - val_loss: 0.1972
Epoch 26/50
[1m1707/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9536 - loss: 0.2351



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9536 - loss: 0.2351 - val_accuracy: 0.9690 - val_loss: 0.1882
Epoch 27/50
[1m1713/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9558 - loss: 0.2245



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9558 - loss: 0.2244 - val_accuracy: 0.9702 - val_loss: 0.1800
Epoch 28/50
[1m1714/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9560 - loss: 0.2196



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9560 - loss: 0.2196 - val_accuracy: 0.9722 - val_loss: 0.1767
Epoch 29/50
[1m1711/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9576 - loss: 0.2121



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9576 - loss: 0.2121 - val_accuracy: 0.9708 - val_loss: 0.1721
Epoch 30/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9573 - loss: 0.2092 - val_accuracy: 0.9716 - val_loss: 0.1724
Epoch 31/50
[1m1701/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9604 - loss: 0.2008



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9604 - loss: 0.2008 - val_accuracy: 0.9702 - val_loss: 0.1689
Epoch 32/50
[1m1708/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9603 - loss: 0.1944



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9603 - loss: 0.1944 - val_accuracy: 0.9720 - val_loss: 0.1640
Epoch 33/50
[1m1714/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9608 - loss: 0.1893



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9608 - loss: 0.1893 - val_accuracy: 0.9700 - val_loss: 0.1637
Epoch 34/50
[1m1712/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9631 - loss: 0.1827



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9631 - loss: 0.1827 - val_accuracy: 0.9748 - val_loss: 0.1540
Epoch 35/50
[1m1700/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9637 - loss: 0.1757



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9637 - loss: 0.1757 - val_accuracy: 0.9734 - val_loss: 0.1471
Epoch 36/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9687 - loss: 0.1618 - val_accuracy: 0.9730 - val_loss: 0.1516
Epoch 37/50
[1m1700/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9665 - loss: 0.1630



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9665 - loss: 0.1630 - val_accuracy: 0.9756 - val_loss: 0.1421
Epoch 38/50
[1m1718/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9681 - loss: 0.1573



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9681 - loss: 0.1573 - val_accuracy: 0.9752 - val_loss: 0.1361
Epoch 39/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9685 - loss: 0.1541 - val_accuracy: 0.9728 - val_loss: 0.1423
Epoch 40/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9687 - loss: 0.1463 - val_accuracy: 0.9742 - val_loss: 0.1369
Epoch 41/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9712 - loss: 0.1399 - val_accuracy: 0.9734 - val_loss: 0.1367
Epoch 42/50
[1m1709/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9714 - loss: 0.1371



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9714 - loss: 0.1371 - val_accuracy: 0.9740 - val_loss: 0.1340
Epoch 43/50
[1m1700/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9709 - loss: 0.1368



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9709 - loss: 0.1368 - val_accuracy: 0.9764 - val_loss: 0.1310
Epoch 44/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9698 - loss: 0.1331 - val_accuracy: 0.9764 - val_loss: 0.1324
Epoch 45/50
[1m1716/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9737 - loss: 0.1289



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9737 - loss: 0.1289 - val_accuracy: 0.9756 - val_loss: 0.1264
Epoch 46/50
[1m1712/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.9721 - loss: 0.1260



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9721 - loss: 0.1260 - val_accuracy: 0.9776 - val_loss: 0.1232
Epoch 47/50
[1m1713/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9747 - loss: 0.1205



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9747 - loss: 0.1205 - val_accuracy: 0.9760 - val_loss: 0.1166
Epoch 48/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9761 - loss: 0.1151 - val_accuracy: 0.9752 - val_loss: 0.1200
Epoch 49/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9775 - loss: 0.1121 - val_accuracy: 0.9764 - val_loss: 0.1174
Epoch 50/50
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9766 - loss: 0.1097 - val_accuracy: 0.9760 - val_loss: 0.1172
Model training completed!
