# Assignment 14
### Group:Hardesh, Jyoti, Narahari and Santu (alphabetize)

## Tesla V100 

In [0]:
!nvidia-smi

Mon Aug 26 18:50:45 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    23W / 300W |     80MiB / 16130MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

## Importing libraries and modules

In [0]:
import numpy as np
import time, math
from tqdm import tqdm_notebook as tqdm

import tensorflow as tf
import tensorflow.contrib.eager as tfe
import matplotlib.pyplot as plt
!git clone https://github.com/santuhazra1/Imp_Lib.git /tmp/Imp_Lib
!mv /tmp/Imp_Lib/Auto_Augment/auto_augment.py auto_augment.py 
!rm -r /tmp/Imp_Lib
from auto_augment import AutoAugment

Cloning into '/tmp/Imp_Lib'...
remote: Enumerating objects: 16, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 16 (delta 3), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (16/16), done.
Checking connectivity... done.


## Enabling eager so that operations should be executed as they are defined

In [0]:
tf.enable_eager_execution()

## Parameters

In [0]:
MOMENTUM = 0.9 #@param {type:"number"}
WEIGHT_DECAY = 5e-4 #@param {type:"number"}

In [0]:
def init_pytorch(shape, dtype=tf.float16, partition_info=None):
  fan = np.prod(shape[:-1])
  bound = 1 / math.sqrt(fan)
  return tf.random.uniform(shape, minval=-bound, maxval=bound, dtype=dtype)

## Model layers

In [0]:
class ConvBN(tf.keras.Model):
  def __init__(self, c_out):
    super().__init__()
    self.conv = tf.keras.layers.Conv2D(filters=c_out, kernel_size=3, padding="SAME", kernel_initializer=init_pytorch, use_bias=False, dtype=tf.float16)
    self.bn = tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5)

  def call(self, inputs):
    return tf.nn.relu(self.bn(self.conv(inputs)))

## Residual Block

In [0]:
class ResBlk(tf.keras.Model):
  def __init__(self, c_out, pool, res = False):
    super().__init__()
    self.conv_bn = ConvBN(c_out)
    self.pool = pool
    self.res = res
    if self.res:
      self.res1 = ConvBN(c_out)
      self.res2 = ConvBN(c_out)

  def call(self, inputs):
    h = self.pool(self.conv_bn(inputs))
    if self.res:
      h = h + self.res2(self.res1(h))
    return h

## Main Model

In [0]:
class DavidNet(tf.keras.Model):
  def __init__(self, c=64, weight=0.125):
    super().__init__()
    pool = tf.keras.layers.MaxPooling2D()
    self.init_conv_bn = ConvBN(c)
    self.blk1 = ResBlk(c, pool, res = True)
    self.blk2 = ResBlk(c*2, pool)
    self.blk3 = ResBlk(c*4, pool, res = True)
    self.pool = tf.keras.layers.GlobalMaxPool2D()
    self.linear = tf.keras.layers.Dense(10, kernel_initializer=init_pytorch, use_bias=False)
    self.weight = weight

  def call(self, x, y):
    h = self.pool(self.blk3(self.blk2(self.blk1(self.init_conv_bn(x)))))
    h = self.linear(h) * self.weight
    ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=h, labels=y)
    loss = tf.reduce_sum(ce)
    correct = tf.reduce_sum(tf.cast(tf.math.equal(tf.argmax(h, axis = 1), y), tf.float16))
    return loss, correct

## Normalization of Data

In [0]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
len_train, len_test = len(x_train), len(x_test)
y_train = y_train.astype('int64').reshape(len_train)
y_test = y_test.astype('int64').reshape(len_test)

train_mean = np.mean(x_train, axis=(0,1,2))
train_std = np.std(x_train, axis=(0,1,2))

normalize = lambda x: ((x - train_mean) / train_std).astype('float16') # todo: check here
pad4 = lambda x: np.pad(x, [(0, 0), (4, 4), (4, 4), (0, 0)], mode='reflect')

#x_train = normalize(pad4(x_train))
x_test = normalize(x_test)

## Data Augmentation

In [0]:
class RandomCrop(object):
    def __init__(self, crop_size=32):
        self.crop_size = crop_size  
        
    def __call__(self, img):
      crop_im = []
      for i in range(img.shape[0]):
        im = img[i]
        im = np.array(im)
        height, width = im.shape[0], im.shape[1]
        dy, dx = self.crop_size , self.crop_size
        x = np.random.randint(0, width - dx + 1)
        y = np.random.randint(0, height - dy + 1)
        crop_im.append(im[y:(y+dy), x:(x+dx), :])
      
      crop_im = np.array(crop_im)
      return crop_im  

randomcrop = RandomCrop(32)

In [0]:
from PIL import Image
def AutoAug(img):
  autoaug = AutoAugment()
  Auto_aug_im = np.zeros_like(img)
  for i in range(img.shape[0]):
    im = img[i]
    im = Image.fromarray(im)
    im = autoaug(im)
    Auto_aug_im[i] = im
  return Auto_aug_im

In [0]:
from PIL import Image
class Cutout(object):
    def __init__(self, length=10):
        self.length = length

    def __call__(self, p, img):
      cutout_im = np.zeros_like(img)
      for i in range(img.shape[0]):
        p_1 = np.random.rand() 
        im = img[i]
        im = np.array(im)
        if p_1 > p:
          cutout_im[i] = im
        else:
          mask_val = im.mean()

          top = np.random.randint(0, im.shape[0])
          left = np.random.randint(0, im.shape[1])
          bottom = top + self.length
          right = left + self.length

          im[top:bottom, left:right, :] = mask_val

          im = Image.fromarray(im)
          cutout_im[i] = im
      return cutout_im

cutout=Cutout()  

In [0]:
print("Augmentation begin....")
for i in range(32):
  aug_img = x_train
  aug_img = pad4(aug_img)
  aug_img = randomcrop(aug_img)
  aug_img = cutout(0.5,aug_img)
  aug_img = AutoAug(aug_img)
  aug_img = normalize(aug_img)
  globals()['x_train%s' % i] = aug_img
print("Augmentation completed")  

Augmentation begin....
Augmentation completed


## Parameters

In [0]:
BATCH_SIZE = 512 #@param {type:"integer"}
LEARNING_RATE = 0.45 #@param {type:"number"}
EPOCHS = 26 #@param {type:"integer"}

In [0]:
model = DavidNet()
batches_per_epoch = len_train//BATCH_SIZE + 1

lr_schedule = lambda t: np.interp([t], [0, (EPOCHS+1)//5, int((EPOCHS+1)*0.7), EPOCHS], [LEARNING_RATE/5.0, LEARNING_RATE, LEARNING_RATE/5.0, 0.004])[0]
#lr_schedule = lambda t: np.interp([t], [0, (EPOCHS+1)//5, int((EPOCHS+1)*0.7), EPOCHS], [LEARNING_RATE/5.0, LEARNING_RATE, LEARNING_RATE/5.0, 0.02])[0]

global_step = tf.train.get_or_create_global_step()
lr_func = lambda: lr_schedule(global_step/batches_per_epoch)/BATCH_SIZE
opt = tf.train.MomentumOptimizer(lr_func, momentum=MOMENTUM, use_nesterov=True)
#data_aug = lambda x, y: (tf.image.random_flip_left_right(tf.random_crop(x, [32, 32, 3])), y)

In [0]:
t = time.time()
test_set = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)

for epoch in range(EPOCHS):
  train_loss = test_loss = train_acc = test_acc = 0.0
  train_set = tf.data.Dataset.from_tensor_slices((globals()['x_train%s' % epoch], y_train)).shuffle(len_train).batch(BATCH_SIZE).prefetch(1)

  tf.keras.backend.set_learning_phase(1)
  for (x, y) in tqdm(train_set):
    with tf.GradientTape() as tape:
      loss, correct = model(x, y)

    var = model.trainable_variables
    grads = tape.gradient(loss, var)
#     for g, v in zip(grads, var):
#       g += v * WEIGHT_DECAY * BATCH_SIZE
    opt.apply_gradients(zip(grads, var), global_step=global_step)

    train_loss += loss.numpy()
    train_acc += correct.numpy()

  tf.keras.backend.set_learning_phase(0)
  for (x, y) in test_set:
    loss, correct = model(x, y)
    test_loss += loss.numpy()
    test_acc += correct.numpy()
    
  print('epoch:', epoch+1, 'lr:', lr_schedule(epoch+1), 'train loss:', train_loss / len_train, 'train acc:', train_acc / len_train, 'val loss:', test_loss / len_test, 'val acc:', test_acc / len_test, 'time:', time.time() - t)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 1 lr: 0.16199999999999998 train loss: 1.60712 train acc: 0.41324 val loss: 1.210425 val acc: 0.5646 time: 10.344749927520752


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 2 lr: 0.23399999999999999 train loss: 1.056255 train acc: 0.62504 val loss: 0.874525 val acc: 0.6913 time: 16.153656482696533


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 3 lr: 0.30599999999999994 train loss: 0.881935 train acc: 0.69084 val loss: 0.966175 val acc: 0.6825 time: 21.889267206192017


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 4 lr: 0.378 train loss: 0.8053775 train acc: 0.71752 val loss: 0.813325 val acc: 0.736 time: 27.562443256378174


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 5 lr: 0.45 train loss: 0.7329 train acc: 0.74412 val loss: 0.7316375 val acc: 0.7582 time: 33.272096395492554


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 6 lr: 0.42230769230769233 train loss: 0.65983 train acc: 0.77044 val loss: 0.5133125 val acc: 0.8258 time: 39.074808835983276


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 7 lr: 0.39461538461538465 train loss: 0.598995 train acc: 0.7911 val loss: 0.4624625 val acc: 0.8421 time: 44.993884563446045


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 8 lr: 0.3669230769230769 train loss: 0.553205 train acc: 0.80706 val loss: 0.452125 val acc: 0.8464 time: 50.9995174407959


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 9 lr: 0.3392307692307692 train loss: 0.52336 train acc: 0.81836 val loss: 0.443525 val acc: 0.8486 time: 56.97211694717407


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 10 lr: 0.31153846153846154 train loss: 0.4786075 train acc: 0.83312 val loss: 0.3979125 val acc: 0.8661 time: 62.70196795463562


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 11 lr: 0.28384615384615386 train loss: 0.4533475 train acc: 0.8409 val loss: 0.40330625 val acc: 0.8663 time: 68.61537742614746


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 12 lr: 0.2561538461538462 train loss: 0.4250125 train acc: 0.85186 val loss: 0.3808 val acc: 0.8738 time: 74.54493379592896


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 13 lr: 0.2284615384615385 train loss: 0.40548 train acc: 0.85878 val loss: 0.4031625 val acc: 0.869 time: 80.47472476959229


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 14 lr: 0.2007692307692308 train loss: 0.3840475 train acc: 0.86608 val loss: 0.35586875 val acc: 0.8808 time: 86.51752805709839


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 15 lr: 0.17307692307692313 train loss: 0.36648375 train acc: 0.8727 val loss: 0.33134375 val acc: 0.8903 time: 92.46221828460693


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 16 lr: 0.14538461538461545 train loss: 0.3469125 train acc: 0.87928 val loss: 0.29551875 val acc: 0.9012 time: 98.46751737594604


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 17 lr: 0.11769230769230771 train loss: 0.3253125 train acc: 0.88596 val loss: 0.29120625 val acc: 0.905 time: 104.62004232406616


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 18 lr: 0.09 train loss: 0.31056625 train acc: 0.89122 val loss: 0.298575 val acc: 0.9039 time: 110.59787344932556


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 19 lr: 0.07925 train loss: 0.295855 train acc: 0.898 val loss: 0.274475 val acc: 0.9113 time: 116.56584668159485


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 20 lr: 0.0685 train loss: 0.29015375 train acc: 0.90016 val loss: 0.27548125 val acc: 0.9081 time: 122.53467845916748


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 21 lr: 0.057749999999999996 train loss: 0.2825 train acc: 0.90178 val loss: 0.2674875 val acc: 0.9125 time: 128.61970829963684


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 22 lr: 0.047 train loss: 0.273785 train acc: 0.90518 val loss: 0.27408125 val acc: 0.9124 time: 134.77852654457092


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 23 lr: 0.036250000000000004 train loss: 0.26356125 train acc: 0.90834 val loss: 0.26581875 val acc: 0.913 time: 140.79869389533997


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 24 lr: 0.025499999999999995 train loss: 0.2564625 train acc: 0.91104 val loss: 0.26153125 val acc: 0.9156 time: 146.78879570960999


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 25 lr: 0.01475 train loss: 0.25056125 train acc: 0.91292 val loss: 0.25875 val acc: 0.9162 time: 152.73997592926025


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


epoch: 26 lr: 0.004 train loss: 0.24575625 train acc: 0.91608 val loss: 0.256925 val acc: 0.9166 time: 158.63901448249817


## So, From the above we can conclude in 24th epoch we reached 91.56% validation accuracy within 146s

In [0]:
!nvidia-smi

Mon Aug 26 19:06:56 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P0    50W / 300W |  15874MiB / 16130MiB |     10%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    