In [14]:
import tensorflow as tf
import keras
from keras.optimizers import SGD, RMSprop, Adam, Lion

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [4]:
x_train = x_train.astype('float') / 255
x_test = x_test.astype('float') / 255

In [5]:
id2class = {
    idx: name for idx, name in enumerate([
        'airplane', 'automobile', 'bird',
        'cat', 'deer', 'dog', 'frog',
        'horse', 'ship', 'truck'
    ])
}

In [6]:
DIM_PER_LAYER = [1152, 576, 288]
EPOCHS = 50

In [7]:
def construct_model(input_shape: tuple[int,...], dim_per_layer: list[int], n_classes: int) -> tf.keras.Model:
    input_x = tf.keras.layers.Input(shape=input_shape)


    x = tf.keras.layers.Flatten()(input_x)
    for dim in dim_per_layer:
        x = tf.keras.layers.Dense(dim, activation='relu')(x)

    out_x = tf.keras.layers.Dense(n_classes)(x) # можна додати activation = 'softmax'


    return tf.keras.Model(inputs=input_x, outputs=out_x)

In [12]:
def train_model(optimizer:tf.keras.optimizers, batch_size:int) -> keras.src.callbacks.History:
    model = construct_model((32,32,3), dim_per_layer=DIM_PER_LAYER, n_classes=len(id2class))

    model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy']
    )

    hist = model.fit(
        x=x_train,
        y=y_train,
        epochs=EPOCHS,
        batch_size=batch_size,
        validation_data=(x_test, y_test)
    )
    return hist

### Stochastic Gradient Descent (SDG)

#### SDG - batch 32 

In [None]:
hist_SGD_batch32 = train_model(SGD(learning_rate=1e-2, momentum=0), batch_size=32)

#### SDG - batch 128

In [None]:
hist_SGD_batch128 = train_model(SGD(learning_rate=1e-2, momentum=0), batch_size=128)

### Stochastic Gradient Descent with momentum (SDG with momentum)

#### SDG with momentum - batch 32

In [None]:
hist_SGD_momentum_batch32 = train_model(SGD(learning_rate=1e-3, momentum=0.9), batch_size=32)

#### SDG with momentum - batch 128

In [None]:
hist_SGD_momentum_batch32 = train_model(SGD(learning_rate=1e-3, momentum=0.9), batch_size=128)

### Root Mean Square Propagation (RMSprop)

#### RMSprop - batch 32

In [None]:
hist_RMSprop_momentum_batch32 = train_model(RMSprop(learning_rate=1e-3, rho=0.9), batch_size=32)

#### RMSprop - batch 128

In [None]:
hist_RMSprop_momentum_batch128 = train_model(RMSprop(learning_rate=1e-3, rho=0.9), batch_size=128)

### Adaptive Moment Estimation (Adam)

#### Adam - batch 32

In [None]:
hist_Adam_momentum_batch32 = train_model(Adam(learning_rate=3e-4, beta_1=0.9, beta_2=0.999), batch_size=32)

#### Adam - batch 128

In [None]:
hist_Adam_momentum_batch128 = train_model(Adam(learning_rate=3e-4, beta_1=0.9, beta_2=0.999), batch_size=128)

### Lion

#### Lion - batch 32

In [None]:
hist_Lion_momentum_batch32 = train_model(Lion(learning_rate=3e-4), batch_size=32)

#### Lion - batch 128

In [None]:
hist_Lion_momentum_batch128 = train_model(Lion(learning_rate=3e-4), batch_size=128)