<a href="https://colab.research.google.com/github/roulupen/EVAAssignments/blob/master/Assignment13/Assignment_13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import time, math
from tqdm import tqdm_notebook as tqdm

import tensorflow as tf
import tensorflow.contrib.eager as tfe

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
from google.colab import drive
drive.mount('../content/drive/', force_remount=True)

Mounted at ../content/drive/


In [3]:
import os
os.listdir('../content/drive/My Drive/EVA/')

['cifar10_best.h5',
 'cifar10_best1.h5',
 'Assignment13',
 'Assignment13.h5',
 'assignment13_weights.h5']

In [0]:
tf.enable_eager_execution()

In [0]:
BATCH_SIZE =  128#@param {type:"integer"}
MOMENTUM = 0.9 #@param {type:"number"}
LEARNING_RATE = 0.01 #@param {type:"number"}
WEIGHT_DECAY = 5e-4 #@param {type:"number"}
EPOCHS =  300#@param {type:"integer"}
TARGET_ACCURACY = 0.9#@param {type: "number"}

https://mc.ai/tutorial-1-cifar10-with-google-colabs-free-gpu%E2%80%8A-%E2%80%8A92-5/

In [0]:
def init_pytorch(shape, dtype=tf.float32, partition_info=None):
  fan = np.prod(shape[:-1])
  bound = 1 / math.sqrt(fan)
  return tf.random.uniform(shape, minval=-bound, maxval=bound, dtype=dtype)

In [0]:
class ConvBN(tf.keras.Model):
  def __init__(self, c_out):
    super().__init__()
    self.conv = tf.keras.layers.Conv2D(filters=c_out, kernel_size=3, padding="SAME", kernel_initializer=init_pytorch, use_bias=False)
    self.bn = tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5)
    self.drop = tf.keras.layers.Dropout(0.05)

  def call(self, inputs):
    return tf.nn.relu(self.bn(self.drop(self.conv(inputs))))

In [0]:
class ResBlk(tf.keras.Model):
  def __init__(self, c_out, pool, res = False):
    super().__init__()
    self.conv_bn = ConvBN(c_out)
    self.pool = pool
    self.res = res
    if self.res:
      self.res1 = ConvBN(c_out)
      self.res2 = ConvBN(c_out)

  def call(self, inputs):
    h = self.pool(self.conv_bn(inputs))
    if self.res:
      h = h + self.res2(self.res1(h))
    return h

In [0]:
class DavidNet(tf.keras.Model):
  def __init__(self, c=64, weight=0.125):
    super().__init__()
    pool = tf.keras.layers.MaxPooling2D()
    self.init_conv_bn = ConvBN(c)
    self.blk1 = ResBlk(c*2, pool, res = True)
    self.blk2 = ResBlk(c*4, pool)
    self.blk3 = ResBlk(c*8, pool, res = True)
    self.pool = tf.keras.layers.GlobalMaxPool2D()
    self.linear = tf.keras.layers.Dense(10, kernel_initializer=init_pytorch, use_bias=False)
    self.weight = weight

  def call(self, x, y):
    h = self.pool(self.blk3(self.blk2(self.blk1(self.init_conv_bn(x)))))
    h = self.linear(h) * self.weight
    ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=h, labels=y)
    loss = tf.reduce_sum(ce)
    correct = tf.reduce_sum(tf.cast(tf.math.equal(tf.argmax(h, axis = 1), y), tf.float32))
    return loss, correct

### Resnet18 model

In [0]:
class Resnet18(tf.keras.Model):
    def __init__(self, c=64, weight=0.125):
        super().__init__()
        pool = tf.keras.layers.MaxPooling2D()
        self.init_conv_bn = ConvBN(c)
        self.b1 = ResBlk(c*2, pool, res = True)
        self.b2 = ResBlk(c*2, pool, res = True)
        self.b3 = ResBlk(c*2, pool, res = True)
        self.b4 = ResBlk(c*2, pool, res = True)
        self.avg_pool = tf.keras.layers.GlobalAveragePooling2D()
        self.linear = tf.keras.layers.Dense(10, kernel_initializer=init_pytorch, use_bias=False)
        self.weight = weight

    def call(self, x, y) :
        h = self.avg_pool(self.b4(self.b3(self.b2(self.b1(self.init_conv_bn(x))))))
        h = self.linear(h) * self.weight
        ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=h, labels=y)
        loss = tf.reduce_sum(ce)
        correct = tf.reduce_sum(tf.cast(tf.math.equal(tf.argmax(h, axis = 1), y), tf.float32))
        return loss, correct

### Using given mean and standard deviation to normalize the input data

In [0]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
len_train, len_test = len(x_train), len(x_test)
y_train = y_train.astype('int64').reshape(len_train)
y_test = y_test.astype('int64').reshape(len_test)


#train_mean = np.mean(x_train, axis=(0,1,2))
#train_std = np.std(x_train, axis=(0,1,2))

# Setting mean and standard deviation of the dataset as provided in the assignment
train_mean =np.array([0.4914, 0.4822, 0.4465])
train_std = np.array([0.2023, 0.1994, 0.2010])

normalize = lambda x: ((x - train_mean) / train_std).astype('float32') # todo: check here
pad4 = lambda x: np.pad(x, [(0, 0), (4, 4), (4, 4), (0, 0)], mode='reflect')

x_train = normalize(pad4(x_train))
x_test = normalize(x_test)

### Function for cyclic learning rate implementation

In [0]:
from tensorflow.python.ops import math_ops
from tensorflow.python.framework import ops

def clr_custom(global_step, step_size = 500., max_lr = 0.1, learning_rate=0.01):
    
    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
    dtype = learning_rate.dtype
    global_step = math_ops.cast(global_step, dtype)
    step_size = math_ops.cast(step_size, dtype)

    double_step = math_ops.multiply(2., step_size)
    global_div_double_step = math_ops.divide(global_step, double_step)
    cycle = math_ops.floor(math_ops.add(1., global_div_double_step))
    
    # computing: x = abs( global_step / step_size – 2 * cycle + 1 )
    double_cycle = math_ops.multiply(2., cycle)
    global_div_step = math_ops.divide(global_step, step_size)
    tmp = math_ops.subtract(global_div_step, double_cycle)  
    x = math_ops.abs(math_ops.add(1., tmp))

    # computing: clr = learning_rate + ( max_lr – learning_rate ) * max( 0, 1 - x )
    a1 = math_ops.maximum(0., math_ops.subtract(1., x))
    a2 = math_ops.subtract(max_lr, learning_rate)
    clr = math_ops.multiply(a1, a2)
    return math_ops.add(clr, learning_rate)

 

*   Step Size = 500, So one cycle = 1000 batches 
*   The MomentumOptimizer uses SGD with mometum, so we are using same optimizer instead of changing it to some other optimizer.
*   Data Augmentation does a random corp of 32x32x3 and random left to right flip

In [0]:
model = Resnet18()

STEP_SIZE = 500 # 100 batches = 1 step, 200 batches = 1 Cycle
batches_per_epoch = len_train//BATCH_SIZE + 1

global_step = None
global_step = tf.train.get_or_create_global_step()
lr_func = lambda: clr_custom(global_step, STEP_SIZE, LEARNING_RATE, LEARNING_RATE/10)
opt = tf.train.MomentumOptimizer(lr_func, momentum=MOMENTUM, use_nesterov=True)
data_aug = lambda x, y: (tf.image.random_flip_left_right(tf.random_crop(x, [32, 32, 3])), y)

#### Here I'm training the model for 300 epochs but once the valdiation accuracy reaches target accuracy(90%) it'll stop training the network. And after each epoch it saves the best model.



In [14]:
t = time.time()
test_set = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)
best_accuracy = 0.0

for epoch in range(EPOCHS):
  train_loss = test_loss = train_acc = test_acc = 0.0
  train_set = tf.data.Dataset.from_tensor_slices((x_train, y_train)).map(data_aug).shuffle(len_train).batch(BATCH_SIZE).prefetch(1)

  tf.keras.backend.set_learning_phase(1)
  for (x, y) in tqdm(train_set):
    with tf.GradientTape() as tape:
      loss, correct = model(x, y)

    var = model.trainable_variables
    grads = tape.gradient(loss, var)
    for g, v in zip(grads, var):
      g += v * WEIGHT_DECAY * BATCH_SIZE
    opt.apply_gradients(zip(grads, var), global_step=global_step)

    train_loss += loss.numpy()
    train_acc += correct.numpy()

  tf.keras.backend.set_learning_phase(0)
  for (x, y) in test_set:
    loss, correct = model(x, y)
    test_loss += loss.numpy()
    test_acc += correct.numpy()

  val_accuracy = test_acc / len_test
  print('epoch:', epoch+1,' lr: ',  clr_custom(global_step, STEP_SIZE, LEARNING_RATE, LEARNING_RATE/10).numpy(),'train loss:', train_loss / len_train, 'train acc:', train_acc / len_train, 'val loss:', test_loss / len_test, 'val acc:', test_acc / len_test, 'time:', time.time() - t)
  if best_accuracy < val_accuracy:
    print('best accuracy improved from: ', best_accuracy, ' to: ', val_accuracy)
    best_accuracy = val_accuracy

    # Save weights to disk
    model.save_weights('../content/drive/My Drive/EVA/assignment13_weights.h5')
  else:
    print('best accuracy: ', best_accuracy)

  if val_accuracy >= TARGET_ACCURACY:
    print('validation accuracy has reached', (TARGET_ACCURACY * 100 ) , '% , stopping the model training')
    break




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 1  lr:  0.008038 train loss: 1.3258672625732422 train acc: 0.5116 val loss: 1.492884727859497 val acc: 0.5322 time: 61.223612785339355
best accuracy improved from:  0.0  to:  0.5322


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 2  lr:  0.004924 train loss: 0.8226072402954101 train acc: 0.71044 val loss: 0.6991150637149811 val acc: 0.7571 time: 120.35376143455505
best accuracy improved from:  0.5322  to:  0.7571


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 3  lr:  0.0041139997 train loss: 0.5891566269683838 train acc: 0.7955 val loss: 0.6361806688308715 val acc: 0.7807 time: 179.42515087127686
best accuracy improved from:  0.7571  to:  0.7807


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 4  lr:  0.008847999 train loss: 0.6079280970001221 train acc: 0.78936 val loss: 0.7674091161727905 val acc: 0.7494 time: 237.82302284240723
best accuracy:  0.7807


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 5  lr:  0.0018099992 train loss: 0.505644806060791 train acc: 0.8241 val loss: 0.4306128664493561 val acc: 0.8553 time: 295.86291432380676
best accuracy improved from:  0.7807  to:  0.8553


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 6  lr:  0.007227999 train loss: 0.42215885936737063 train acc: 0.85526 val loss: 0.6630589408874512 val acc: 0.7796 time: 354.56682085990906
best accuracy:  0.8553


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 7  lr:  0.0057340004 train loss: 0.4793232102584839 train acc: 0.83382 val loss: 0.49658905544281007 val acc: 0.8263 time: 412.6019330024719
best accuracy:  0.8553


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 8  lr:  0.0033040005 train loss: 0.34911906795501707 train acc: 0.87838 val loss: 0.4220309110164642 val acc: 0.8582 time: 471.5020167827606
best accuracy improved from:  0.8553  to:  0.8582


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 9  lr:  0.009657999 train loss: 0.3858923077011108 train acc: 0.86766 val loss: 0.6313067604064941 val acc: 0.7966 time: 530.1520590782166
best accuracy:  0.8582


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 10  lr:  0.0026199985 train loss: 0.36844080623626707 train acc: 0.8714 val loss: 0.3575267066001892 val acc: 0.8776 time: 588.3263425827026
best accuracy improved from:  0.8582  to:  0.8776


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 11  lr:  0.006418002 train loss: 0.27949102626800537 train acc: 0.90224 val loss: 0.531385116481781 val acc: 0.818 time: 646.8777165412903
best accuracy:  0.8776


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 12  lr:  0.0065440014 train loss: 0.3600914179992676 train acc: 0.87576 val loss: 0.4375645916938782 val acc: 0.852 time: 704.8324356079102
best accuracy:  0.8776


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 13  lr:  0.0024940032 train loss: 0.266445078086853 train acc: 0.9082 val loss: 0.3221746654510498 val acc: 0.8908 time: 763.3265266418457
best accuracy improved from:  0.8776  to:  0.8908


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 14  lr:  0.009531999 train loss: 0.2790169612503052 train acc: 0.90358 val loss: 0.528274972486496 val acc: 0.8278 time: 821.5416920185089
best accuracy:  0.8908


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 15  lr:  0.0034300042 train loss: 0.2972776396560669 train acc: 0.89666 val loss: 0.33477204046249387 val acc: 0.8865 time: 879.0978229045868
best accuracy:  0.8908


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 16  lr:  0.005608001 train loss: 0.20928073644638062 train acc: 0.92692 val loss: 0.3645399008750915 val acc: 0.8777 time: 937.1556060314178
best accuracy:  0.8908


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 17  lr:  0.007354003 train loss: 0.27947034919738767 train acc: 0.90242 val loss: 0.3529918699860573 val acc: 0.8822 time: 995.1121320724487
best accuracy:  0.8908


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 18  lr:  0.0016840019 train loss: 0.21973351705551147 train acc: 0.92256 val loss: 0.30220756340026855 val acc: 0.9 time: 1053.0510358810425
best accuracy improved from:  0.8908  to:  0.9
validation accuracy has reached 90.0 % , stopping the model training
