In [1]:
#pip install tensorflow-addons

I want to optimize the following parameters :


- learning rate
- optimizer (SGD,adam)
- temperature

# set up the parameters needed for the optimizer

In [3]:
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp

In [4]:
HP_temperature = hp.HParam('temperature', hp.RealInterval(0.,0.2))
HP_learning_rate = hp.HParam('learning_rate', hp.RealInterval(0., 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))

METRIC_ACCURACY = 'accuracy'


with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
  hp.hparams_config(
    hparams=[HP_temperature, HP_learning_rate, HP_OPTIMIZER],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
  )




# define the training of the network

In [5]:
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers


In [6]:
num_classes = 10
input_shape = (32, 32, 3)

AUTO=tf.data.AUTOTUNE
#learning_rate = 0.001
batch_size = 265
hidden_units = 512
projection_units = 128
num_epochs = 50
dropout_rate = 0.5
#temperature = 0.05



# Load the train and test data splits
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Display shapes of train and test datasets
print(f"x_train shape: {x_train.shape} - y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape} - y_test shape: {y_test.shape}")

test_dataset=(tf.data.Dataset.from_tensor_slices((x_test, y_test))
    .shuffle(1024)
    .batch(batch_size)
    .prefetch(AUTO)
)




x_train shape: (50000, 32, 32, 3) - y_train shape: (50000, 1)
x_test shape: (10000, 32, 32, 3) - y_test shape: (10000, 1)


In [7]:
data_augmentation = keras.Sequential(
    [
        layers.Normalization(),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.02),
        layers.RandomWidth(0.2),
        layers.RandomHeight(0.2),
    ]
)

# Setting the state of the normalization layer.
data_augmentation.layers[0].adapt(x_train)


In [8]:
def create_encoder(backbone=keras.applications.ResNet50V2(
        include_top=False, weights=None, input_shape=input_shape, pooling="avg"
    )):
    

    inputs = keras.Input(shape=input_shape)
    augmented = data_augmentation(inputs)
    outputs = backbone(augmented)
    model = keras.Model(inputs=inputs, outputs=outputs, name="cifar10-encoder")
    return model


def create_classifier(encoder, trainable=True):
    learning_rate=0.001
    for layer in encoder.layers:
        layer.trainable = trainable

    inputs = keras.Input(shape=input_shape)
    features = encoder(inputs)
    features = layers.Dropout(dropout_rate)(features)
    features = layers.Dense(hidden_units, activation="relu")(features)
    features = layers.Dropout(dropout_rate)(features)
    outputs = layers.Dense(num_classes, activation="softmax")(features)

    model = keras.Model(inputs=inputs, outputs=outputs, name="cifar10-classifier")
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss=keras.losses.SparseCategoricalCrossentropy(),
        metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )
    return model

class SupervisedContrastiveLoss(keras.losses.Loss):
    def __init__(self, temperature=1, name=None):
        super(SupervisedContrastiveLoss, self).__init__(name=name)
        self.temperature = temperature

    def __call__(self, labels, feature_vectors, sample_weight=None):
        # Normalize feature vectors
        feature_vectors_normalized = tf.math.l2_normalize(feature_vectors, axis=1)
        # Compute logits
        logits = tf.divide(
            tf.matmul(
                feature_vectors_normalized, tf.transpose(feature_vectors_normalized)
            ),
            self.temperature,
        )
        return tfa.losses.npairs_loss(tf.squeeze(labels), logits)


def add_projection_head(encoder):
    inputs = keras.Input(shape=input_shape)
    features = encoder(inputs)
    outputs = layers.Dense(projection_units, activation="relu")(features)
    model = keras.Model(
        inputs=inputs, outputs=outputs, name="cifar-encoder_with_projection-head"
    )
    return model



def apply_resnet_block(x,downsample,conv_by_block):
    
    depth_input=x.shape[-1]
    
    
    if downsample:
        depth=depth_input*2
        skiped=layers.Conv2D(depth,1,strides=(2,2),activation=None)(x)#linear projection
        x=layers.Conv2D(depth,3,strides=(2,2), activation='relu',padding="same")(x)
        x=layers.BatchNormalization()(x)
    else:
        depth=depth_input
        skiped=x
        x=layers.Conv2D(depth,3, activation='relu',padding="same")(x)
        x=layers.BatchNormalization()(x)
        
    for i in range(1,conv_by_block-1):
        x=layers.Conv2D(depth,3, activation='relu',padding="same")(x)
        x=layers.BatchNormalization()(x)
    
    x=layers.Conv2D(depth,3,padding="same")(x)#don't apply activation to the last 
         
    x=skiped+x
    x=layers.ReLU()(x)
    x=layers.BatchNormalization()(x)
   
    return x

def get_resnet_backbone(input_shape,hparams):
    """
    """
    assert hparams["conv_by_block"]>=2
    inputs=layers.Input((32,32,3))
    x=layers.Conv2D(hparams["depth_first_convolution"],7,strides=(2,2),activation='relu',padding="same")(inputs)
    x=layers.BatchNormalization()(x)
    for block in range(1,hparams["number_of_block"]+1):
        x=apply_resnet_block(x,block in hparams["downsample_num"],hparams["conv_by_block"])
       
        
    x=layers.Conv2D(hparams["output_dim"],3, activation='relu',padding="same")(x)
    x=layers.BatchNormalization()(x)
    if hparams["globalPoolingType"]=="Mean":
        
        x=layers.GlobalAveragePooling2D()(x)
 
    return tf.keras.Model(inputs,x)


## model definition with the api

In [13]:
number_of_training=10
learning_rate=10**np.random.uniform(-0.3,-3,number_of_training)
temperature=np.random.uniform(0.01,0.2,number_of_training)
optimizer=np.random.choice(["adam","SGD"],number_of_training)


In [14]:
config_to_test={i:{"temperature":temperature[i],"optimizer":optimizer[i],"learning_rate":learning_rate[i]} for i in range(number_of_training)}
for n,hparams in config_to_test.items():
    print(n,hparams)

0 {'temperature': 0.07167965229184195, 'optimizer': 'SGD', 'learning_rate': 0.49778680580027007}
1 {'temperature': 0.143440051763222, 'optimizer': 'adam', 'learning_rate': 0.03191585744792269}
2 {'temperature': 0.19814413672203127, 'optimizer': 'adam', 'learning_rate': 0.016089648642657405}
3 {'temperature': 0.18783177697486292, 'optimizer': 'adam', 'learning_rate': 0.47137449329034486}
4 {'temperature': 0.01632473454774216, 'optimizer': 'adam', 'learning_rate': 0.0012719336272152611}
5 {'temperature': 0.07595381732611305, 'optimizer': 'SGD', 'learning_rate': 0.003135672683222365}
6 {'temperature': 0.12355834498251328, 'optimizer': 'SGD', 'learning_rate': 0.15799313996496078}
7 {'temperature': 0.014626659456859606, 'optimizer': 'adam', 'learning_rate': 0.0013602300641906559}
8 {'temperature': 0.01960442658292879, 'optimizer': 'SGD', 'learning_rate': 0.4194842499443589}
9 {'temperature': 0.14383485948403216, 'optimizer': 'adam', 'learning_rate': 0.1557032048305991}


In [15]:
np.log10(0.5)

-0.3010299956639812

In [16]:
hparams_resnet={"depth_first_convolution":64,
                "output_dim":2048,
                "number_of_block":5,
                "downsample_num":[4,5],
                "conv_by_block":2,
                "globalPoolingType":"Mean"
            }


num_epochs=1

def train_test_model(hparams):
    
    
    learning_rate=hparams["learning_rate"]
    
    optimizer= keras.optimizers.Adam(learning_rate) if hparams["optimizer"]=="adam" else keras.optimizers.SGD(learning_rate)
    temperature=hparams["temperature"]
    
    resnet=get_resnet_backbone(input_shape,hparams_resnet)
    
    
    #first step : contrastive loss
    early_stop_unsup=tf.keras.callbacks.EarlyStopping(
    monitor="loss", patience=2, restore_best_weights=True
    )
    
    resnet=get_resnet_backbone(input_shape,hparams_resnet)

    encoder = create_encoder(resnet)

    encoder_with_projection_head = add_projection_head(encoder)
    encoder_with_projection_head.compile(
        optimizer=optimizer,
        loss=SupervisedContrastiveLoss(temperature),
    )

 

    history = encoder_with_projection_head.fit(
        x=x_train, y=y_train, batch_size=batch_size, epochs=num_epochs,callbacks=[early_stop_unsup],verbose=2
    )

    
    #second step : linear classifier
    
    early_stop=tf.keras.callbacks.EarlyStopping(
    monitor="sparse_categorical_accuracy", patience=2, restore_best_weights=True
    )


    classifier = create_classifier(encoder,trainable=False)

    history = classifier.fit(x=x_train, y=y_train, batch_size=batch_size, epochs=10,callbacks=[early_stop])

    
    accuracy = classifier.evaluate(x_test, y_test)[1]

    return accuracy


def run(run_dir, hparams):
  with tf.summary.create_file_writer(run_dir).as_default():
    hp.hparams(hparams)  # record the values used in this trial
    accuracy = train_test_model(hparams)
    tf.summary.scalar("accuracy", accuracy, step=1)



   

 

for session_num,hparams in config_to_test.items():
    run_name = "run-%d" % session_num
    print('--- Starting trial: %s' % run_name)
    print(hparams)
    run('logs_resnet/hparam_tuning/' + run_name, hparams)
    

    

--- Starting trial: run-0
{'temperature': 0.07167965229184195, 'optimizer': 'SGD', 'learning_rate': 0.49778680580027007}
189/189 - 35s - loss: 6.0399 - 35s/epoch - 184ms/step
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
--- Starting trial: run-1
{'temperature': 0.143440051763222, 'optimizer': 'adam', 'learning_rate': 0.03191585744792269}
189/189 - 36s - loss: 5.4869 - 36s/epoch - 189ms/step
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
--- Starting trial: run-2
{'temperature': 0.19814413672203127, 'optimizer': 'adam', 'learning_rate': 0.016089648642657405}
189/189 - 35s - loss: 5.4631 - 35s/epoch - 188ms/step
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
--- Starting trial: run-3
{'temperature': 0.18783177697486292, 'optimizer': 'adam', 'learning_rate': 0.47137449329034486}
189/189 - 37s - los

189/189 - 38s - loss: 5.5081 - 38s/epoch - 198ms/step
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
--- Starting trial: run-7
{'temperature': 0.014626659456859606, 'optimizer': 'adam', 'learning_rate': 0.0013602300641906559}
189/189 - 57s - loss: 10.3248 - 57s/epoch - 300ms/step
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
--- Starting trial: run-8
{'temperature': 0.01960442658292879, 'optimizer': 'SGD', 'learning_rate': 0.4194842499443589}
189/189 - 38s - loss: 7.3366 - 38s/epoch - 203ms/step
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
--- Starting trial: run-9
{'temperature': 0.14383485948403216, 'optimizer': 'adam', 'learning_rate': 0.1557032048305991}
189/189 - 40s - loss: 5.5372 - 40s/epoch - 212ms/step
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
