# EfficientNet PEFT on Cifar-10 with Cerebros

In [1]:
import sys
sys.path.insert(0, '../..')

In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.datasets import cifar10, cifar100
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Resizing, Lambda, Flatten, Dense
import pandas as pd
import numpy as np
from cerebros.simplecerebrosrandomsearch.simple_cerebros_random_search\
    import SimpleCerebrosRandomSearch
import pendulum
from cerebros.units.units import DenseUnit
from cerebros.denseautomlstructuralcomponent.dense_automl_structural_component\
    import zero_7_exp_decay, zero_95_exp_decay, simple_sigmoid
from ast import literal_eval

## Fine-tuning some layers of EfficientNet from scratch

This is done for comparison only, and may be skipped. This section provides no information on Cerebros' efficiency

Download EfficientNet (v.2, small model) with Imagenet weights (1000 classes)

In [3]:
enet = tf.keras.applications.efficientnet_v2.EfficientNetV2S(
    include_top=True,
    weights='imagenet',
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation='softmax',
    include_preprocessing=True
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-s.h5


In [4]:
enet.summary()

Model: "efficientnetv2-s"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 384, 384, 3)]        0         []                            
                                                                                                  
 rescaling (Rescaling)       (None, 384, 384, 3)          0         ['input_1[0][0]']             
                                                                                                  
 stem_conv (Conv2D)          (None, 192, 192, 24)         648       ['rescaling[0][0]']           
                                                                                                  
 stem_bn (BatchNormalizatio  (None, 192, 192, 24)         96        ['stem_conv[0][0]']           
 n)                                                                                

Make all layers untrainable except for the very last convolutional layer

In [5]:
for layer in enet.layers:
    layer.trainable = False
enet.layers[-6].trainable  = True

Download and prepare Cifar-10 data

In [6]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [7]:
y_train_cat = to_categorical(y_train, 1000)
y_test_cat = to_categorical(y_test, 1000)

Resizing images to meet EfficientNet's input shape

In [8]:
def resize(x):
    return tf.image.resize(x,size=(384,384),method='bilinear')

Modify the model

In [9]:
input_shape = (32,32,3)

In [10]:
input_layer = Input(shape=input_shape)
prep = Lambda(resize)(input_layer)
out = enet(prep)
enet_mod = Model(inputs=input_layer, outputs=out)

In [11]:
enet_mod.compile(optimizer='adam',
                 loss=tf.keras.losses.CategoricalCrossentropy(),
                 metrics=[tf.keras.metrics.TopKCategoricalAccuracy(k=1, name='top_1_categorical_accuracy')])

Try to fit it on Cifar-10 data and then evaluate (this will be efficient enough if trained on the complete dataset ...)

In [12]:
enet_mod.fit(X_train, y_train_cat)



<keras.src.callbacks.History at 0x7c81d1aebee0>

In [13]:
enet_mod.evaluate(X_test, y_test_cat)



[0.2743999660015106, 0.9056000113487244]

## PEFT with Cerebros

Download Cifar-10 data

In [14]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [15]:
input_shape   = (32,32,3)
INPUT_SHAPES  = [input_shape]
OUTPUT_SHAPES = [10]

Subsampling a small balanced set of samples (with of without shuffling) from the train dataset and train labels

In [16]:
def subsample_train(X_train, y_train, num_samples):
    #
    X_sub = []
    y_sub = []
    #
    assert 1 <= num_samples <= 5000
    #
    for cat in range(10):
        #
        ind, _ = np.where(y_train==cat)
        #
        X_cat = X_train[ind]
        X_cat = X_cat[:num_samples]
        y_cat = y_train[ind]
        y_cat = y_cat[:num_samples]
        #
        X_sub += [X_cat]
        y_sub += [y_cat]
    #
    X_sub = np.vstack(X_sub)
    y_sub = np.vstack(y_sub)
    #
    assert X_sub.shape[0] == y_sub.shape[0]
    #
    ind = np.arange(X_sub.shape[0])
    np.random.shuffle(ind)
    #
    return X_sub[ind], y_sub[ind]

We take only 200 samples in each category (out of 5000)

In [17]:
num_samples = 200
X_sub, y_sub = subsample_train(X_train, y_train, num_samples)

Preparing tensors for the training set and labels

In [18]:
training_x   = [tf.constant(X_sub)]
y_train_cat  = to_categorical(y_sub, 10)
train_labels = [tf.constant(y_train_cat)]

Donwloading EfficientNet (v.2, small)

In [19]:
enet = tf.keras.applications.efficientnet_v2.EfficientNetV2S(
    include_top=True,
    weights='imagenet',
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation='softmax',
    include_preprocessing=True
)

Resizing images to meet EfficientNet's input shape

In [20]:
def resize(x):
    return tf.image.resize(x,size=(384,384),method='bilinear')

Make only the last convolutional layer trainable

In [21]:
for layer in enet.layers:
    layer.trainable = False
enet.layers[-6].trainable = True

Preparing the base model for Cerebros search

In [22]:
enet_io = Model(inputs=enet.layers[0].input,
                outputs=enet.layers[-3].output)

In [23]:
input_layer = Input(shape=input_shape)
prep = Lambda(resize)(input_layer)
out = Flatten()(enet_io(prep))
base_mod = Model(inputs=input_layer, outputs=out)

In [24]:
activation = 'swish'
predecessor_level_connection_affinity_factor_first = 2.0
predecessor_level_connection_affinity_factor_main = 0.97
max_consecutive_lateral_connections = 5
p_lateral_connection = 0.97
num_lateral_connection_tries_per_unit = 2
learning_rate = 0.001
epochs = 5  # [1, 100]
batch_size = 20
maximum_levels = 6  # [3,7]
maximum_units_per_level = 5  # [2,10]
maximum_neurons_per_unit = 4  # [2,20]

In [25]:
# Final training task
TIME = pendulum.now(tz='America/New_York').__str__()[:16]\
    .replace('T', '_')\
    .replace(':', '_')\
    .replace('-', '_')
#
PROJECT_NAME = f'{TIME}_cerebros_auto_ml_test_cifar10_efficientnet'
#
meta_trial_number = 42
#
cerebros_automl = SimpleCerebrosRandomSearch(
    unit_type=DenseUnit,
    input_shapes=INPUT_SHAPES,
    output_shapes=OUTPUT_SHAPES,
    training_data=training_x,
    labels=train_labels,
    validation_split=0.2,
    direction='maximize',
    metric_to_rank_by="val_top_1_categorical_accuracy",
    minimum_levels=2,
    maximum_levels=maximum_levels,
    minimum_units_per_level=1,
    maximum_units_per_level=maximum_units_per_level,
    minimum_neurons_per_unit=1,
    maximum_neurons_per_unit=maximum_neurons_per_unit,
    activation=activation,
    final_activation='softmax',
    number_of_architecture_moities_to_try=3,
    number_of_tries_per_architecture_moity=2,
    minimum_skip_connection_depth=1,
    maximum_skip_connection_depth=7,
    predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first,
    predecessor_level_connection_affinity_factor_first_rounding_rule='ceil',
    predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main,
    predecessor_level_connection_affinity_factor_main_rounding_rule='ceil',
    predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay,
    seed=8675309,
    max_consecutive_lateral_connections=max_consecutive_lateral_connections,
    gate_after_n_lateral_connections=3,
    gate_activation_function=simple_sigmoid,
    p_lateral_connection=p_lateral_connection,
    p_lateral_connection_decay=zero_95_exp_decay,
    num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit,
    learning_rate=learning_rate,
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.TopKCategoricalAccuracy(
                k=1, name='top_1_categorical_accuracy')
             ],
    epochs=epochs,
    project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}",
    model_graphs='model_graphs',
    batch_size=batch_size,
    meta_trial_number=meta_trial_number,
    base_models=[base_mod])

In [26]:
%%time
result = cerebros_automl.run_random_search()

SimpleCerebrosRandomSearch.input_shapes: [(32, 32, 3)]
nan
>nnf>ceil
k is: 0 value is: [{'1': <class 'cerebros.units.units.InputUnit'>}]
0
k is: 1 value is: [{'2': <class 'cerebros.units.units.DenseUnit'>}, {'4': <class 'cerebros.units.units.DenseUnit'>}, {'2': <class 'cerebros.units.units.DenseUnit'>}]
1
Trying to create level 1
We think level 1's predecessors are: [0]
k is: 2 value is: [{'10': <class 'cerebros.units.units.FinalDenseUnit'>}]
2
Trying to create Final level 2
Trying to create level 2
We think level final level 2's predecessors are: [0, 1]
levels:
[0, 1, 2]
{'0': 'InputUnitModule'}
InputLevel.input_shapes [(32, 32, 3)]
{'2': <class 'cerebros.units.units.DenseUnit'>}
{'4': <class 'cerebros.units.units.DenseUnit'>}
{'2': <class 'cerebros.units.units.DenseUnit'>}
Debug: I am 2 selecting 1
debug: meta_level_number
debug: meta_level_number
debug: meta_level_number
debug: meta_level_number
Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmateri

In [27]:
print(f'Best accuracy achieved is {result}')
print(f'top-1 categorical accuracy')

Best accuracy achieved is 0.875
top-1 categorical accuracy


Evaluating the best model found

In [29]:
#best_model_found = cerebros_automl.get_best_model()
best_model_found =\
tf.keras.models.load_model(cerebros_automl.best_model_path,\
safe_mode=False)

In [30]:
#
eval_loss = tf.keras.losses.CategoricalCrossentropy()
#
eval_metrics =\
[tf.keras.metrics.TopKCategoricalAccuracy(k=1,\
            name='eval_top_1_categorical_accuracy'),
 tf.keras.metrics.TopKCategoricalAccuracy(k=5,\
            name='eval_top_5_categorical_accuracy')
]

In [31]:
best_model_found.compile(loss=eval_loss, metrics=eval_metrics)
best_model_found.summary()

Model: "NeuralNetworkFuture_0000000000000nan_tr_1_nn_materialized"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 NeuralNetworkFuture_000000  [(None, 32, 32, 3)]          0         []                            
 0000000nan_tr_1_InputLevel                                                                       
 _0000000000000000_tr_1_Inp                                                                       
 utUnit_0000000000000000_tr                                                                       
 _1_0_inp (InputLayer)                                                                            
                                                                                                  
 model_2 (Functional)        (None, 1280)                 2033136   ['NeuralNetworkFuture_00000000
                                          

In [32]:
print("Evaluating best model found ...")
print("Loss | Top-1 accuracy | Top-5 accuracy")
y_test_cat = to_categorical(y_test, 10)
best_model_found.evaluate(X_test, y_test_cat)

Evaluating best model found ...
Loss | Top-1 accuracy | Top-5 accuracy


[0.5334373116493225, 0.870199978351593, 0.9911999702453613]

Now train the best model on the entire train dataset

In [33]:
y_train_cat  = to_categorical(y_train, 10)
#
optimizer = Adam(learning_rate=0.0005)
#
loss = tf.keras.losses.CategoricalCrossentropy()
#
metrics =\
[tf.keras.metrics.TopKCategoricalAccuracy(k=1,\
            name='eval_top_1_categorical_accuracy'),
 tf.keras.metrics.TopKCategoricalAccuracy(k=5,\
            name='eval_top_5_categorical_accuracy')
]
#
best_model_found.compile(optimizer=optimizer,
                         loss=loss,
                         metrics=metrics,
                         )

In [34]:
%%time
best_model_found.fit(X_train,
                     y_train_cat,
                     validation_split=0.35,
                     epochs=6,
                    )

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
CPU times: user 6min 45s, sys: 1min 48s, total: 8min 34s
Wall time: 19min 33s


<keras.src.callbacks.History at 0x7c8138ea43a0>

Evaluating again

In [35]:
print("Evaluating best model found ...")
print("Loss | Top-1 accuracy | Top-5 accuracy")
best_model_found.evaluate(X_test, y_test_cat)

Evaluating best model found ...
Loss | Top-1 accuracy | Top-5 accuracy


[0.31209999322891235, 0.9143000245094299, 0.9968000054359436]