In [1]:
import numpy as np
import tensorflow as tf
import os

from keras.utils import np_utils
import cv2


Using TensorFlow backend.


In [2]:

(train_features, train_labels), (test_features, test_labels) = tf.keras.datasets.cifar10.load_data()
num_train, img_channels, img_rows, img_cols =  train_features.shape
num_test, _, _, _ =  test_features.shape
num_classes = len(np.unique(train_labels))

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [0]:
train_features = train_features.astype('float32')/255
test_features = test_features.astype('float32')/255
# convert class labels to binary class labels
train_labels = np_utils.to_categorical(train_labels, num_classes)
test_labels = np_utils.to_categorical(test_labels, num_classes)

In [0]:
def get_cutout_eraser_and_random_crop(p=0.5,s_l=0.05,s_h=0.3,r_1=0.3,r_2=1/0.3,max_erasers_per_image=1,pixel_level=True,random_crop_size=(32,32),padding_pixels=4):
  
  assert max_erasers_per_image>=1 
  def eraser(input_img):
        v_l = np.min(input_img)
        v_h = np.max(input_img)
        img_h, img_w, img_c = input_img.shape
        p_1 = np.random.rand()

        if p_1 > p:
            return input_img
        mx = np.random.randint(1,max_erasers_per_image+1)
        for i in range(mx):
          while True:
              s = np.random.uniform(s_l, s_h) * img_h * img_w
              r = np.random.uniform(r_1, r_2)
              w = int(np.sqrt(s / r))
              h = int(np.sqrt(s * r))
              left = np.random.randint(0, img_w)
              top = np.random.randint(0, img_h)

              if left + w <= img_w and top + h <= img_h:
                  break

          if pixel_level:
              c = np.random.uniform(v_l, v_h, (h, w, img_c))
          else:
              c = np.random.uniform(v_l, v_h)

          input_img[top:top + h, left:left + w, :] = c
        return input_img

    
  def random_crop(input_image):
    assert input_image.shape[2]==3

    #pad for 4 pixels
    img = cv2.copyMakeBorder(input_image,padding_pixels,padding_pixels,padding_pixels,padding_pixels,cv2.BORDER_REPLICATE)
    height , width =img.shape[0],img.shape[1]
    dy,dx = random_crop_size
    x = np.random.randint(0,width - dx + 1)
    y = np.random.randint(0,height - dy + 1)
    return img[y:(y+dy),x:(x+dx),:]

  def preprocess_image(input_image):
    return eraser(random_crop(input_image))
  
  return preprocess_image
      

In [0]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(horizontal_flip=0.5,featurewise_center=True, featurewise_std_normalization=True,preprocessing_function=get_cutout_eraser_and_random_crop())
datagen.mean = np.array([0.4914, 0.4822, 0.4465], dtype=np.float32).reshape((1,1,3)) # ordering: [R, G, B]
datagen.std = np.array([0.2023, 0.1994, 0.2010], dtype=np.float32).reshape((1,1,3)) # ordering: [R, G, B]
#datagen.fit(train_features)
train_generator = datagen.flow(train_features,train_labels,batch_size=128)

In [0]:
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)
test_datagen.fit(test_features)
test_generator = test_datagen.flow(test_features,test_labels,batch_size=128)

#Resnet Model Creation

In [0]:
def ResNetBlock(input_layer, channels,stride=1):
  
  bn_1 = tf.keras.layers.BatchNormalization(momentum=0.9,epsilon=1e-5)(input_layer)
  activation_layer_b1 = tf.keras.layers.Activation('relu')(bn_1)
  block_layer_1 = tf.keras.layers.Conv2D(channels, (3,3), padding='same',strides=stride)(activation_layer_b1)
  
  bn_2 = tf.keras.layers.BatchNormalization(momentum=0.9,epsilon=1e-5)(block_layer_1)
  activation_layer_b2 = tf.keras.layers.Activation('relu')(bn_2) 
  block_layer_2 = tf.keras.layers.Conv2D(channels, (3,3), padding='same')(activation_layer_b2)
   
  
  return block_layer_2
  

In [8]:
# from tf.keras.layers import Input, add, GlobalAveragePooling2D, Dense
#from tf.keras.models import Model

inputs = tf.keras.layers.Input(shape=(32, 32, 3))

x1 = tf.keras.layers.Conv2D(32, (3, 3),padding='same')(inputs)   #32x32 
activation_x1 = tf.keras.layers.Activation('relu')(x1)
bn1 = tf.keras.layers.BatchNormalization(momentum=0.9,epsilon=1e-5)(activation_x1)

# x2 = tf.keras.layers.Conv2D(64, (3, 3),padding='same')(activation_x1)   #32x32 
# activation_x2 = tf.keras.layers.Activation('relu')(x2)

# x3 = tf.keras.layers.Conv2D(64, (3, 3),padding='same')(activation_x2)   #32x32 
# activation_x3 = tf.keras.layers.Activation('relu')(x3)


##block 1

blk1 = ResNetBlock(bn1,32)  ##32x32
z1 = tf.keras.layers.add([blk1,bn1])

blk1_c = ResNetBlock(z1,32)
z1_c = tf.keras.layers.add([blk1_c,z1])

##block 2

blk2 = ResNetBlock(z1_c,64)
one_blk = tf.keras.layers.Conv2D(64, (1, 1), padding='same')(z1_c)
z2 = tf.keras.layers.add([blk2,one_blk])

blk2_c = ResNetBlock(z2,64)
z2_c = tf.keras.layers.add([blk2_c,z2])

##block3

blk3 = ResNetBlock(z2_c,128,stride=2)
one_blk_1 = tf.keras.layers.Conv2D(128, (1, 1), padding='same',strides=2)(z2_c)
z3 = tf.keras.layers.add([blk3,one_blk_1])

blk3_c = ResNetBlock(z3,128)
z3_c = tf.keras.layers.add([blk3_c,z3])

##block4

blk4 = ResNetBlock(z3_c,256,stride=2)
one_blk_2 = tf.keras.layers.Conv2D(256, (1, 1), padding='same',strides=2)(z3_c)
z4 = tf.keras.layers.add([blk4,one_blk_2])

blk4_c = ResNetBlock(z4,256)
z4_c = tf.keras.layers.add([blk4_c,z4])

avg_pool_layer = tf.keras.layers.GlobalAveragePooling2D()(z4_c)

#flatten_layer = Flatten()(avg_pool_layer)

fc_layer = tf.keras.layers.Dense(10, activation='softmax')(avg_pool_layer)


model = tf.keras.models.Model(inputs=inputs, outputs= fc_layer)


W0805 14:09:37.377666 140136801228672 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [9]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 32)   896         input_1[0][0]                    
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 32, 32)   0           conv2d[0][0]                     
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 32)   128         activation[0][0]                 
______________________________________________________________________________________________

In [10]:
#from one_cycle_lr import LRFinder
from one_cycle_lr_tf import LRFinder
num_samples= train_features.shape[0]
batch_size =512
num_epoch=50
max_lr=0.05


ModuleNotFoundError: ignored

#Best LR would be 0.01

In [0]:
import numpy as np
import tensorflow as tf
EPOCHS=24
LEARNING_RATE=0.01
len_train=50000
BATCH_SIZE=512
MOMENTUM=0.9

batches_per_epoch = len_train//BATCH_SIZE + 1

lr_schedule = lambda t: np.interp([t], [0, (EPOCHS+1)//5, EPOCHS], [0, LEARNING_RATE, 0])[0]
global_step = tf.train.get_or_create_global_step()
lr_func = lambda: lr_schedule(global_step/batches_per_epoch)/BATCH_SIZE
opt = tf.train.MomentumOptimizer(lr_func, momentum=MOMENTUM, use_nesterov=True)

In [0]:
## for 24 epochs only

MAX_LR= 0.1
base_lr = 0.01

def lr_func(epoch,lr):
  lr = base_lr
  max_lr = MAX_LR
  one_step = (max_lr-base_lr)/16
  if(epoch == 0):
    return lr
  elif(epoch>0 and epoch<8):
    lr += (max_lr-base_lr)*(epoch)/7
  else:
    lr = max_lr - (max_lr-base_lr)*(epoch-7)/16 
  #base_lr = lr
  return lr
  

In [0]:
opt = tf.keras.optimizers.SGD(momentum=0.9)
model.compile(optimizer=opt , loss='categorical_crossentropy', metrics=['accuracy'])

In [0]:

# from one_cycle_lr_tf import OneCycleLR

# lr_manager = OneCycleLR(num_samples, num_epoch, batch_size, max_lr,
#                         end_percentage=0.1, scale_percentage=None,
#                         maximum_momentum=0.95, minimum_momentum=0.85)
                        


In [12]:
filepath = "Resnet-13-test1.hdf5"
from tensorflow.keras.callbacks import ModelCheckpoint,LearningRateScheduler

checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')


# Train the model
model_info = model.fit_generator(train_generator,
                                 steps_per_epoch=np.ceil(50000/128), epochs=24,  
                                 validation_data = test_generator, verbose=1,callbacks=[LearningRateScheduler(lr_func, verbose=1)])



Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.
Epoch 1/24

Epoch 00002: LearningRateScheduler reducing learning rate to 0.02285714285714286.
Epoch 2/24

Epoch 00003: LearningRateScheduler reducing learning rate to 0.03571428571428572.
Epoch 3/24

Epoch 00004: LearningRateScheduler reducing learning rate to 0.04857142857142858.
Epoch 4/24

Epoch 00005: LearningRateScheduler reducing learning rate to 0.06142857142857144.
Epoch 5/24

Epoch 00006: LearningRateScheduler reducing learning rate to 0.07428571428571429.
Epoch 6/24

Epoch 00007: LearningRateScheduler reducing learning rate to 0.08714285714285715.
Epoch 7/24

Epoch 00008: LearningRateScheduler reducing learning rate to 0.1.
Epoch 8/24

Epoch 00009: LearningRateScheduler reducing learning rate to 0.094375.
Epoch 9/24

Epoch 00010: LearningRateScheduler reducing learning rate to 0.08875000000000001.
Epoch 10/24

Epoch 00011: LearningRateScheduler reducing learning rate to 0.083125.
Epoch 11/24

Epoch 00012: Lea

In [0]:
lr_func