# RegNetX016 PEFT on Cifar-10 with Cerebros

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive/Colab\ Notebooks/cerebros-core-algorithm-alpha
import sys
sys.path.insert(0, '../..')
!pip install -r requirements.txt

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks/cerebros-core-algorithm-alpha


In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.datasets import cifar10, cifar100
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Resizing, Lambda, Flatten, Dense
import pandas as pd
import numpy as np
from cerebros.simplecerebrosrandomsearch.simple_cerebros_random_search\
    import SimpleCerebrosRandomSearch
import pendulum
from cerebros.units.units import DenseUnit
from cerebros.denseautomlstructuralcomponent.dense_automl_structural_component\
    import zero_7_exp_decay, zero_95_exp_decay, simple_sigmoid
from ast import literal_eval

## Fine-tuning some layers of RegNet from scratch

This is done for comparison only, and may be skipped. This section provides no information on Cerebros' efficiency

Download RegNetX016 with Imagenet weights (1000 classes)

In [None]:
regnet = tf.keras.applications.regnet.RegNetX016(model_name='regnetx016',
                                                include_top=True,
                                                include_preprocessing=True,
                                                weights='imagenet',
                                                input_tensor=None,
                                                input_shape=None,
                                                pooling=None,
                                                classes=1000,
                                                classifier_activation='softmax'
                                                )

In [None]:
regnet.summary()

Model: "regnetx016"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_5 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 regnetx016_prestem_rescali  (None, 224, 224, 3)          0         ['input_5[0][0]']             
 ng (Rescaling)                                                                                   
                                                                                                  
 regnetx016_stem_conv (Conv  (None, 112, 112, 32)         864       ['regnetx016_prestem_rescaling
 2D)                                                                [0][0]']                      
                                                                                         

Make all layers untrainable except for the very last convolutional layer

In [None]:
for layer in regnet.layers:
    layer.trainable = False
regnet.layers[-9].trainable = True
regnet.layers[-6].trainable  = True

Cifar-10 testing

In [None]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [None]:
y_train_cat = to_categorical(y_train, 1000)
y_test_cat = to_categorical(y_test, 1000)

Lambda layer for preprocessing

In [None]:
def resize(x):
    return tf.image.resize(x,size=(224,224),method='bicubic')

Modify the model

In [None]:
input_shape = (32,32,3)

In [None]:
input_layer = Input(shape=input_shape)
prep = Lambda(resize)(input_layer)
out = regnet(prep)
regnet_mod = Model(inputs=input_layer, outputs=out)

In [None]:
regnet_mod.compile(optimizer='adam',
                   loss=tf.keras.losses.CategoricalCrossentropy(),
                   metrics=[tf.keras.metrics.TopKCategoricalAccuracy(k=1, name='top_1_categorical_accuracy')])

In [None]:
regnet_mod.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 lambda_2 (Lambda)           (None, 224, 224, 3)       0         
                                                                 
 regnetx016 (Functional)     (None, 1000)              9233640   
                                                                 
Total params: 9233640 (35.22 MB)
Trainable params: 1028736 (3.92 MB)
Non-trainable params: 8204904 (31.30 MB)
_________________________________________________________________


Try to fit it on Cifar-10 data and then evaluate (there is no hope this is gonna work ...)

In [None]:
regnet_mod.fit(X_train, y_train_cat)



<keras.src.callbacks.History at 0x781585951a50>

In [None]:
regnet_mod.evaluate(X_test, y_test_cat)



[4.857247829437256, 0.0]

## PEFT with Cerebros

Download Cifar-10 data

In [None]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

Input and output shapes

In [None]:
input_shape   = (32,32,3)
INPUT_SHAPES  = [input_shape]
OUTPUT_SHAPES = [10]

Subsampling a small balanced set of samples (with of without shuffling) from the train dataset and train labels

In [None]:
def subsample_train(X_train, y_train, num_samples):
    #
    X_sub = []
    y_sub = []
    #
    assert 1 <= num_samples <= 5000
    #
    for cat in range(10):
        #
        ind, _ = np.where(y_train==cat)
        np.random.shuffle(ind)
        #
        X_cat = X_train[ind]
        X_cat = X_cat[:num_samples]
        y_cat = y_train[ind]
        y_cat = y_cat[:num_samples]
        #
        X_sub += [X_cat]
        y_sub += [y_cat]
    #
    X_sub = np.vstack(X_sub)
    y_sub = np.vstack(y_sub)
    #
    assert X_sub.shape[0] == y_sub.shape[0]
    #
    ind = np.arange(X_sub.shape[0])
    np.random.shuffle(ind)
    #
    return X_sub[ind], y_sub[ind]

We take only 1000 samples in each category (out of 5000)

In [None]:
num_samples = 1000
X_sub, y_sub = subsample_train(X_train, y_train, num_samples)

Preparing tensors for the training set and labels

In [None]:
training_x   = [tf.constant(X_sub)]
y_train_cat  = to_categorical(y_sub, 10)
train_labels = [tf.constant(y_train_cat)]

Donwloading RegNetX016

In [None]:
regnet = tf.keras.applications.regnet.RegNetX016(model_name='regnetx016',
                                                include_top=True,
                                                include_preprocessing=True,
                                                weights='imagenet',
                                                input_tensor=None,
                                                input_shape=None,
                                                pooling=None,
                                                classes=1000,
                                                classifier_activation='softmax'
                                                )

Preprocessing images for RegNet

In [None]:
def resize(x):
    return tf.image.resize(x,size=(224,224),method='bicubic')

Make only the last convolutional layer trainable

In [None]:
for layer in regnet.layers:
    layer.trainable = False
regnet.layers[-9].trainable = True
regnet.layers[-6].trainable  = True

Preparing the base model for Cerebros search

In [None]:
regnet_io = Model(inputs=regnet.layers[0].input,
                  outputs=regnet.layers[-2].output)

In [None]:
input_layer = Input(shape=input_shape)
prep = Lambda(resize)(input_layer)
out = Flatten()(regnet_io(prep))
base_mod = Model(inputs=input_layer, outputs=out)

In [None]:
activation = 'swish'
predecessor_level_connection_affinity_factor_first = 2.0
predecessor_level_connection_affinity_factor_main = 0.97
max_consecutive_lateral_connections = 5
p_lateral_connection = 0.97
num_lateral_connection_tries_per_unit = 2
learning_rate = 0.001
epochs = 5  # [1, 100]
batch_size = 20
maximum_levels = 6  # [3,7]
maximum_units_per_level = 7  # [2,10]
maximum_neurons_per_unit = 5  # [2,20]

In [None]:
# Final training task
TIME = pendulum.now(tz='America/New_York').__str__()[:16]\
    .replace('T', '_')\
    .replace(':', '_')\
    .replace('-', '_')
#
PROJECT_NAME = f'{TIME}_cerebros_auto_ml_test_cifar10_regnet'
#
meta_trial_number = 42
#
cerebros_automl = SimpleCerebrosRandomSearch(
    unit_type=DenseUnit,
    input_shapes=INPUT_SHAPES,
    output_shapes=OUTPUT_SHAPES,
    training_data=training_x,
    labels=train_labels,
    validation_split=0.2,
    direction='maximize',
    metric_to_rank_by="val_top_1_categorical_accuracy",
    minimum_levels=2,
    maximum_levels=maximum_levels,
    minimum_units_per_level=1,
    maximum_units_per_level=maximum_units_per_level,
    minimum_neurons_per_unit=1,
    maximum_neurons_per_unit=maximum_neurons_per_unit,
    activation=activation,
    final_activation='softmax',
    number_of_architecture_moities_to_try=3,
    number_of_tries_per_architecture_moity=2,
    minimum_skip_connection_depth=1,
    maximum_skip_connection_depth=7,
    predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first,
    predecessor_level_connection_affinity_factor_first_rounding_rule='ceil',
    predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main,
    predecessor_level_connection_affinity_factor_main_rounding_rule='ceil',
    predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay,
    seed=8675309,
    max_consecutive_lateral_connections=max_consecutive_lateral_connections,
    gate_after_n_lateral_connections=3,
    gate_activation_function=simple_sigmoid,
    p_lateral_connection=p_lateral_connection,
    p_lateral_connection_decay=zero_95_exp_decay,
    num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit,
    learning_rate=learning_rate,
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.TopKCategoricalAccuracy(
                k=1, name='top_1_categorical_accuracy')
             ],
    epochs=epochs,
    project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}",
    model_graphs='model_graphs',
    batch_size=batch_size,
    meta_trial_number=meta_trial_number,
    base_models=[base_mod])

In [None]:
%%time
result = cerebros_automl.run_random_search()

SimpleCerebrosRandomSearch.input_shapes: [(32, 32, 3)]
nan
>nnf>ceil
k is: 0 value is: [{'1': <class 'cerebros.units.units.InputUnit'>}]
0
k is: 1 value is: [{'3': <class 'cerebros.units.units.DenseUnit'>}]
1
Trying to create level 1
We think level 1's predecessors are: [0]
k is: 2 value is: [{'2': <class 'cerebros.units.units.DenseUnit'>}, {'2': <class 'cerebros.units.units.DenseUnit'>}, {'4': <class 'cerebros.units.units.DenseUnit'>}]
2
Trying to create level 2
We think level 2's predecessors are: [0, 1]
k is: 3 value is: [{'4': <class 'cerebros.units.units.DenseUnit'>}, {'5': <class 'cerebros.units.units.DenseUnit'>}]
3
Trying to create level 3
We think level 3's predecessors are: [0, 1, 2]
k is: 4 value is: [{'1': <class 'cerebros.units.units.DenseUnit'>}, {'2': <class 'cerebros.units.units.DenseUnit'>}, {'1': <class 'cerebros.units.units.DenseUnit'>}, {'4': <class 'cerebros.units.units.DenseUnit'>}]
4
Trying to create level 4
We think level 4's predecessors are: [0, 1, 2, 3]
k is:

In [None]:
print(f'Best accuracy achieved is {result}')
print(f'top-1 categorical accuracy')

Best accuracy achieved is 0.906000018119812
top-1 categorical accuracy


Evaluating the best model found

In [None]:
best = '2023_11_03_17_05_cerebros_auto_ml_test_cifar10_regnet_meta_42/models/tr_0000000000000002_subtrial_0000000000000001.keras'

In [None]:
#best_model_found = cerebros_automl.get_best_model()
best_model_found = tf.keras.models.load_model(cerebros_automl.best_model_path,\
safe_mode=False)

In [None]:
#
eval_loss = tf.keras.losses.CategoricalCrossentropy()
#
eval_metrics =\
[tf.keras.metrics.TopKCategoricalAccuracy(k=1,\
            name='eval_top_1_categorical_accuracy'),
 tf.keras.metrics.TopKCategoricalAccuracy(k=5,\
            name='eval_top_5_categorical_accuracy')
]

In [None]:
best_model_found.compile(loss=eval_loss, metrics=eval_metrics)
best_model_found.summary()

Model: "NeuralNetworkFuture_0000000000000nan_tr_2_nn_materialized"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 NeuralNetworkFuture_000000  [(None, 32, 32, 3)]          0         []                            
 0000000nan_tr_2_InputLevel                                                                       
 _0000000000000000_tr_2_Inp                                                                       
 utUnit_0000000000000000_tr                                                                       
 _2_0_inp (InputLayer)                                                                            
                                                                                                  
 model_5 (Functional)        (None, 912)                  8320640   ['NeuralNetworkFuture_00000000
                                          

In [None]:
print("Evaluating best model found ...")
print("Loss | Top-1 accuracy | Top-5 accuracy")
y_test_cat = to_categorical(y_test, 10)
best_model_found.evaluate(X_test, y_test_cat)

Evaluating best model found ...
Loss | Top-1 accuracy | Top-5 accuracy


[0.6763510704040527, 0.8906999826431274, 0.9927999973297119]

Now train the best model on the entire train dataset

In [None]:
y_train_cat  = to_categorical(y_train, 10)
#
optimizer = Adam(learning_rate=0.0005)
#
loss = tf.keras.losses.CategoricalCrossentropy()
#
metrics =\
[tf.keras.metrics.TopKCategoricalAccuracy(k=1,\
            name='eval_top_1_categorical_accuracy'),
 tf.keras.metrics.TopKCategoricalAccuracy(k=5,\
            name='eval_top_5_categorical_accuracy')
]
#
best_model_found.compile(optimizer=optimizer,
                         loss=loss,
                         metrics=metrics,
                         run_eagerly=True)

Below we use k-fold crossvalidation

In [None]:
from sklearn.model_selection import KFold

n_splits = 3
kf = KFold(n_splits=n_splits)

for train_index, val_index in kf.split(X_train):

    X_t, X_v = X_train[train_index], X_train[val_index]
    y_t, y_v = y_train_cat[train_index], y_train_cat[val_index]

    history = best_model_found.fit(X_t, y_t)
    eval = best_model_found.evaluate(X_v, y_v)



Evaluating again

In [None]:
print("Evaluating best model found ...")
print("Loss | Top-1 accuracy | Top-5 accuracy")
y_test_cat = to_categorical(y_test, 10)
best_model_found.evaluate(X_test, y_test_cat)

Evaluating best model found ...
Loss | Top-1 accuracy | Top-5 accuracy


[0.22819805145263672, 0.9247000217437744, 0.9983000159263611]