<a href="https://colab.research.google.com/github/poojasaxena/tensorflow-developer-zertificate-coursera/blob/main/course2_convolutional-neural-networks-tensorflow/05_tuning_hyperparameters/Course_2_Part_10_Lesson_2_Notebook_KerasTuner_cifar10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
%autosave 120
%matplotlib inline

Autosaving every 120 seconds





# Step 1. Basic Setup

In [3]:
from platform import python_version
print(python_version())

3.7.10


In [4]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
print(tf.__version__)

2.4.1


## Step 2.  KerasTuner Class

In [5]:
!pip install -q -U keras-tuner

[?25l[K     |█████▏                          | 10kB 18.7MB/s eta 0:00:01[K     |██████████▍                     | 20kB 22.7MB/s eta 0:00:01[K     |███████████████▋                | 30kB 18.3MB/s eta 0:00:01[K     |████████████████████▉           | 40kB 16.4MB/s eta 0:00:01[K     |██████████████████████████      | 51kB 12.3MB/s eta 0:00:01[K     |███████████████████████████████▎| 61kB 11.5MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 5.6MB/s 
[?25h  Building wheel for keras-tuner (setup.py) ... [?25l[?25hdone
  Building wheel for terminaltables (setup.py) ... [?25l[?25hdone


In [6]:
import kerastuner as kt

In [7]:
from kerastuner import HyperModel

class CustomHyperModel(HyperModel):

    def __init__(self, L1=0):
        self.L1=L1
        
    def build(self, hp):
        INPUT_SHAPE= (32, 32, 3)
        
        model = keras.Sequential()        
        for i in range(hp.Int('conv_blocks', 3, 5, default=3)):
            filters = hp.Int('filters_' + str(i), 32, 256, step=32)
            
            for _ in range(2):
                model.add(keras.layers.Conv2D(filters, kernel_size=(3,3), padding='same', input_shape=INPUT_SHAPE))
                model.add(keras.layers.BatchNormalization())
                model.add(keras.layers.ReLU())
            
            if hp.Choice('pooling' + str(i), ['avg', 'max']) == 'max' :
                model.add(keras.layers.MaxPooling2D())
            else:
                model.add(keras.layers.AvgPool2D())
        
        model.add(keras.layers.GlobalAvgPool2D())
        model.add(keras.layers.Dense(hp.Int('hidden_size', 30, 100, step=10, default=50), activation='relu',kernel_regularizer=tf.keras.regularizers.L1(l1=self.L1)))
        model.add(keras.layers.Dropout(hp.Float('dropout', 0, 0.5, step=0.1, default=0.5)))
        model.add(keras.layers.Dense(10, activation='softmax'))

        
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')), 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
        print(model.summary())
        return model

### categorical_crossentropy vs sparse_categorical_crossentropy
1. categorical_crossentropy (cce) produces a one-hot array containing the probable match for each category,
2. sparse_categorical_crossentropy (scce) produces a category index of the most likely matching category.

## Step 2.1. Instantiate the tuner

In [8]:
import kerastuner as kt

simple_hypermodel = CustomHyperModel()
tuner = kt.Hyperband(simple_hypermodel,
                    objective='val_accuracy', 
                    max_epochs=30,
                    hyperband_iterations=2)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 32)        896       
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 32)        128       
_________________________________________________________________
re_lu (ReLU)                 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 32)        9248      
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 32)        128       
_________________________________________________________________
re_lu_1 (ReLU)               (None, 32, 32, 32)        0         
_________________________________________________________________
average_pooling2d (AveragePo (None, 16, 16, 32)        0

## Step 3. Dataset Preparation

### 3.1 tfds dataset preparation

In [None]:
import tensorflow_datasets as tfds

data = tfds.load("cifar10")
train_ds, test_ds = data["train"], data["test"]


In [None]:
def standardize_record(record):
    return tf.cast(record["image"], tf.float32) / 255.0, record["label"]


train_ds = train_ds.map(standardize_record).cache().batch(64).shuffle(10000)
test_ds = test_ds.map(standardize_record).cache().batch(64)

### 3.2 keras.datasets

In [9]:
(img_train, label_train), (img_test, label_test) = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [10]:
## lets check the number of train/test images
print('No of train images: ', img_train.shape)
print('No of test images: ', img_test.shape)
print('label format for trainig:', label_train.shape)
print('label format for testing:', label_test.shape)
print('unique lables are :', np.unique(label_train))

No of train images:  (50000, 32, 32, 3)
No of test images:  (10000, 32, 32, 3)
label format for trainig: (50000, 1)
label format for testing: (10000, 1)


NameError: ignored

In [11]:
## normalize dataset
img_train = img_train.astype('float32')/255.0
img_test = img_test.astype('float32')/255.0

# Step 4. Hyperparameter Search
* each model will train for at most 30 epochs and 2 iteratios of the Hyperband algo will be run.

In [None]:
tuner.search(img_train, label_train, epochs=12, validation_split=0.2)
# tuner.search(train_ds, validation_data=test_ds, callbacks=[tf.keras.callbacks.EarlyStopping(patience=1)])

## get the optimal hyperparameters
best_hyperparameters=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The Optimal number of units in the first dense layeer is {best_hyperparameters.get('hidden_size')} and 
the optimal learning rate for the optimal learning_rate is {best_hyperparameters.get('learning_rate')}""")

Trial 71 Complete [00h 06m 01s]
val_accuracy: 0.7610999941825867

Best val_accuracy So Far: 0.8345999717712402
Total elapsed time: 03h 45m 44s

Search: Running Trial #72

Hyperparameter    |Value             |Best Value So Far 
conv_blocks       |5                 |4                 
filters_0         |128               |128               
pooling0          |avg               |avg               
filters_1         |96                |192               
pooling1          |avg               |avg               
filters_2         |224               |128               
pooling2          |avg               |avg               
hidden_size       |100               |70                
dropout           |0.1               |0.2               
learning_rate     |0.0015373         |0.00017981        
filters_3         |224               |192               
pooling3          |max               |avg               
filters_4         |64                |96                
pooling4          |max         

In [None]:
# Build the model with the optimal hyperparamers and train it on the data for 20 epochs
model=tuner.hypermodel.build(best_hyperparameters)
history=model.fit(img_train, label_train, epochs=10, validation_split=0.2)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) +1
print('Best epoch: %d' % (best_epoch,))