In [0]:
import numpy as np
import tensorflow as tf
import os

from keras.utils import np_utils
import cv2
from keras.layers import Dense, Dropout, Activation, Flatten, Add, BatchNormalization

In [0]:
(train_features, train_labels), (test_features, test_labels) = tf.keras.datasets.cifar10.load_data()
num_train, img_channels, img_rows, img_cols =  train_features.shape
num_test, _, _, _ =  test_features.shape
num_classes = len(np.unique(train_labels))

In [0]:
train_features = train_features.astype('float32')/255
test_features = test_features.astype('float32')/255
# convert class labels to binary class labels
train_labels = np_utils.to_categorical(train_labels, num_classes)
test_labels = np_utils.to_categorical(test_labels, num_classes)

In [0]:
def get_cutout_eraser_and_random_crop(p=0.5,s_l=0.05,s_h=0.3,r_1=0.3,r_2=1/0.3,max_erasers_per_image=1,pixel_level=True,random_crop_size=(32,32),padding_pixels=4):
  
  assert max_erasers_per_image>=1 
  def eraser(input_img):
        v_l = np.min(input_img)
        v_h = np.max(input_img)
        img_h, img_w, img_c = input_img.shape
        p_1 = np.random.rand()

        if p_1 > p:
            return input_img
        mx = np.random.randint(1,max_erasers_per_image+1)
        for i in range(mx):
          while True:
              s = np.random.uniform(s_l, s_h) * img_h * img_w
              r = np.random.uniform(r_1, r_2)
              w = int(np.sqrt(s / r))
              h = int(np.sqrt(s * r))
              left = np.random.randint(0, img_w)
              top = np.random.randint(0, img_h)

              if left + w <= img_w and top + h <= img_h:
                  break

          if pixel_level:
              c = np.random.uniform(v_l, v_h, (h, w, img_c))
          else:
              c = np.random.uniform(v_l, v_h)

          input_img[top:top + h, left:left + w, :] = c
        return input_img

    
  def random_crop(input_image):
    assert input_image.shape[2]==3

    #pad for 4 pixels
    img = cv2.copyMakeBorder(input_image,padding_pixels,padding_pixels,padding_pixels,padding_pixels,cv2.BORDER_REPLICATE)
    height , width =img.shape[0],img.shape[1]
    dy,dx = random_crop_size
    x = np.random.randint(0,width - dx + 1)
    y = np.random.randint(0,height - dy + 1)
    return img[y:(y+dy),x:(x+dx),:]

  def preprocess_image(input_image):
    return eraser(random_crop(input_image))
  
  return preprocess_image

In [0]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(horizontal_flip=0.5,featurewise_center=True, featurewise_std_normalization=True,preprocessing_function=get_cutout_eraser_and_random_crop())
datagen.mean = np.array([0.4914, 0.4822, 0.4465], dtype=np.float32).reshape((1,1,3)) # ordering: [R, G, B]
datagen.std = np.array([0.2023, 0.1994, 0.2010], dtype=np.float32).reshape((1,1,3)) # ordering: [R, G, B]
#datagen.fit(train_features)
train_generator = datagen.flow(train_features,train_labels,batch_size=128)

In [0]:
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)
test_datagen.fit(test_features)
test_generator = test_datagen.flow(test_features,test_labels,batch_size=128)

In [0]:
# import time, math
# def init_pytorch(shape, dtype=tf.float32, partition_info=None):
#   fan = np.prod(shape[:-1])
#   bound = 1 / math.sqrt(fan)
#   return tf.random.uniform(shape, minval=-bound, maxval=bound, dtype=dtype)

initializer = tf.keras.initializers.glorot_normal(seed=None)

W0807 14:11:22.780110 140594319284096 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1288: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [0]:
def WResNetBlock(input_layer,channels,stride=1):
  bn_1 = tf.keras.layers.BatchNormalization(momentum=0.9,epsilon=1e-5)(input_layer)
  activation_layer_b1 = tf.keras.layers.Activation('relu')(bn_1)
  block_layer_1 = tf.keras.layers.Conv2D(channels, (3,3), padding='same',kernel_initializer=initializer,use_bias=False,strides=stride)(activation_layer_b1)
  bn_2 = tf.keras.layers.BatchNormalization(momentum=0.9,epsilon=1e-5)(block_layer_1)
  activation_layer_b2 = tf.keras.layers.Activation('relu')(bn_2)
  block_layer_2 = tf.keras.layers.Conv2D(channels, (3,3), padding='same',kernel_initializer=initializer,use_bias=False,strides=1)(activation_layer_b2)
  
  return block_layer_2, activation_layer_b1

In [0]:
# from tf.keras.layers import Input, add, GlobalAveragePooling2D, Dense
#from tf.keras.models import Model

inputs = tf.keras.layers.Input(shape=(32, 32, 3))
x1 = tf.keras.layers.Conv2D(16 ,(3, 3),padding='same',kernel_initializer=initializer,use_bias=False)(inputs)   #32x32 

#FIRST BLOCK
blk1,ack1 = WResNetBlock(x1,96) 
ack1add = tf.keras.layers.Conv2D(96, (1, 1), padding='same',kernel_initializer=initializer,use_bias=False)(ack1)
fb1 = tf.keras.layers.add([ack1add,blk1])

#SECOND BLOCK
blk2,ack2 = WResNetBlock(fb1,96) 
fb2 = tf.keras.layers.add([blk2,ack2])

#THIRD BLOCK
blk3,ack3 = WResNetBlock(fb2,96) 
fb3 = tf.keras.layers.add([blk3,ack3])


#FOURTH BLOCK
blk4,ack4 = WResNetBlock(fb3,192,2) 
ack4add = tf.keras.layers.Conv2D(192, (1, 1), padding='same',kernel_initializer=initializer,strides=(2,2),use_bias=False)(ack4)
fb4 = tf.keras.layers.add([blk4,ack4add])


#FIFTH BLOCK
blk5,ack5 = WResNetBlock(fb4,192) 
fb5 = tf.keras.layers.add([blk5,ack5])

#SIXTH BLOCK
blk6,ack6 = WResNetBlock(fb5,192) 
fb6 = tf.keras.layers.add([blk6,ack6])

#SEVENTH BLOCK
blk7,ack7 = WResNetBlock(fb6,384,2) 
ack7add = tf.keras.layers.Conv2D(384, (1, 1), padding='same',kernel_initializer=initializer,strides=(2,2),use_bias=False)(ack7)
fb7 = tf.keras.layers.add([blk7,ack7add])

#EIGHTH BLOCK
blk8,ack8 = WResNetBlock(fb7,384) 
fb8 = tf.keras.layers.add([blk8,ack8])

#NINTH BLOCK
blk9,ack9 = WResNetBlock(fb8,384) 
fb9 = tf.keras.layers.add([blk9,ack9])

bn_10 = tf.keras.layers.BatchNormalization(momentum=0.9,epsilon=1e-5)(fb9)
ack10 = tf.keras.layers.Activation('relu')(bn_10)
avgpool = tf.keras.layers.GlobalAveragePooling2D()(ack10)

flatten_layer = tf.keras.layers.Flatten()(avgpool)

fc_layer = tf.keras.layers.Dense(10, activation='softmax')(flatten_layer)

model = tf.keras.models.Model(inputs=inputs, outputs= fc_layer)

In [0]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d_88 (Conv2D)              (None, 32, 32, 16)   432         input_5[0][0]                    
__________________________________________________________________________________________________
batch_normalization_76 (BatchNo (None, 32, 32, 16)   64          conv2d_88[0][0]                  
__________________________________________________________________________________________________
activation_76 (Activation)      (None, 32, 32, 16)   0           batch_normalization_76[0][0]     
______________________________________________________________________________________________

In [0]:
# #from one_cycle_lr import LRFinder
# from one_cycle_lr_tf import LRFinder
# num_samples= train_features.shape[0]
# batch_size =512
# num_epoch=50
# max_lr=0.05

#Best LR would be 0.01

In [0]:
# def truncate(n, decimals=0):
#     multiplier = 10 ** decimals
#     return int(n * multiplier) / multiplier

In [0]:
## for 24 epochs only

MAX_LR= 0.05
base_lr = 0.01

def lr_func(epoch,lr):
  lr = base_lr
  max_lr = MAX_LR
  
  if(epoch == 0):
    lr = base_lr
  elif(epoch>0 and epoch<11):
    lr += (max_lr-base_lr)*(epoch)/11
  else:
    lr = max_lr - (max_lr-base_lr)*(epoch-11)/18
  print("final lr ",round(lr,5))
  return round(lr,5)
  

In [0]:
opt = tf.keras.optimizers.SGD(momentum=0.9)
model.compile(optimizer=opt , loss='categorical_crossentropy', metrics=['accuracy'])

In [0]:
# num_samples= train_features.shape[0]
# batch_size =128
# num_epoch=24
# max_lr=0.1

# from one_cycle_lr_tf import OneCycleLR

# lr_manager = OneCycleLR(num_samples, num_epoch, batch_size, max_lr,
#                         end_percentage=0.1, scale_percentage=None,
#                         maximum_momentum=0.95, minimum_momentum=0.85)

# opt = tf.keras.optimizers.SGD()
# model.compile(optimizer=opt , loss='categorical_crossentropy', metrics=['accuracy'])                   maximum_momentum=0.95, minimum_momentum=0.85)
   

In [0]:
filepath = "Resnet-13-test1.hdf5"
from tensorflow.keras.callbacks import ModelCheckpoint,LearningRateScheduler

checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')


##Train the model
model_info = model.fit_generator(train_generator,
                                 steps_per_epoch=np.ceil(50000/128), epochs=30,  
                                 validation_data = test_generator, verbose=1,callbacks=[checkpoint,LearningRateScheduler(lr_func, verbose=1)])


# model_info = model.fit_generator(train_generator,
#                                  steps_per_epoch=np.ceil(50000/128), epochs=24,  
#                                  validation_data = test_generator, verbose=1,callbacks=[checkpoint,lr_manager])


final lr  0.01

Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.
Epoch 1/30
Epoch 00001: val_acc improved from -inf to 0.54090, saving model to Resnet-13-test1.hdf5
final lr  0.01364

Epoch 00002: LearningRateScheduler reducing learning rate to 0.01364.
Epoch 2/30
Epoch 00002: val_acc improved from 0.54090 to 0.66360, saving model to Resnet-13-test1.hdf5
final lr  0.01727

Epoch 00003: LearningRateScheduler reducing learning rate to 0.01727.
Epoch 3/30
Epoch 00003: val_acc improved from 0.66360 to 0.76730, saving model to Resnet-13-test1.hdf5
final lr  0.02091

Epoch 00004: LearningRateScheduler reducing learning rate to 0.02091.
Epoch 4/30
Epoch 00004: val_acc improved from 0.76730 to 0.78050, saving model to Resnet-13-test1.hdf5
final lr  0.02455

Epoch 00005: LearningRateScheduler reducing learning rate to 0.02455.
Epoch 5/30
Epoch 00005: val_acc improved from 0.78050 to 0.78360, saving model to Resnet-13-test1.hdf5
final lr  0.02818

Epoch 00006: LearningRateSched