<a href="https://colab.research.google.com/github/sarvan0506/AI/blob/Dev/Project13/Assignment13Baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
tf.enable_eager_execution()
import numpy as np
import time, math
from tqdm import tqdm_notebook as tqdm

import tensorflow.contrib.eager as tfe

In [0]:
BATCH_SIZE = 128 #@param {type:"integer"}
MOMENTUM = 0.9 #@param {type:"number"}
LEARNING_RATE = 0.4 #@param {type:"number"}
WEIGHT_DECAY = 5e-4 #@param {type:"number"}
EPOCHS =  300#@param {type:"integer"}

In [0]:
def init_pytorch(shape, dtype=tf.float32, partition_info=None):
  fan = np.prod(shape[:-1])
  bound = 1 / math.sqrt(fan)
  return tf.random.uniform(shape, minval=-bound, maxval=bound, dtype=dtype)

In [0]:
class ConvBN(tf.keras.Model):
  def __init__(self, c_out):
    super().__init__()
    self.conv = tf.keras.layers.Conv2D(filters=c_out, kernel_size=3, padding="SAME", kernel_initializer=init_pytorch, use_bias=False)
    self.bn = tf.keras.layers.BatchNormalization()
    #self.drop = tf.keras.layers.Dropout(0.05)

  def call(self, inputs):
    return tf.nn.relu(self.bn(self.conv(inputs)))

In [0]:
class ConvBranch(tf.keras.Model):
  def __init__(self, c_out, res = False):
    super().__init__()
    self.conv = tf.keras.layers.Conv2D(filters=c_out, kernel_size=3, padding="SAME", kernel_initializer=init_pytorch, use_bias=False)
    self.conv_s = tf.keras.layers.Conv2D(filters=c_out, kernel_size=3, padding="SAME", strides=(2,2), kernel_initializer=init_pytorch, use_bias=False)
    self.bn = tf.keras.layers.BatchNormalization()
    self.res = res

  def call(self, inputs):
    prep = tf.nn.relu(self.bn(inputs))
    if self.res:
      shortcut = self.conv_s(prep)
      branch = self.conv(tf.nn.relu(self.bn(self.conv_s(prep))))
    else:
      shortcut = prep
      branch = self.conv(tf.nn.relu(self.bn(self.conv(prep))))

    block = shortcut + branch
    return block

In [0]:
class ResNet18(tf.keras.Model):
    def __init__(self, c=64):
      super().__init__()
      self.init_conv_bn = ConvBN(c)
      self.block1 = ConvBranch(c)
      self.blockl20 = ConvBranch(c*2, res=True)
      self.blockl21 = ConvBranch(c*2)
      self.blockl30 = ConvBranch(c*3, res=True)
      self.blockl31 = ConvBranch(c*3)
      self.blockl40 = ConvBranch(c*4, res=True)
      self.blockl41 = ConvBranch(c*4)

      '''
      self.layer2 = ConvBranch(ConvBranch(c*2), res=True)
      self.layer3 = ConvBranch(ConvBranch(c*4), res=True)
      self.layer4 = ConvBranch(ConvBranch(c*6), res=True)
      '''

      self.pool = tf.keras.layers.GlobalMaxPool2D()
      self.linear = tf.keras.layers.Dense(10, kernel_initializer=init_pytorch, use_bias=False)
      

    def call(self, x, y):
      I = self.init_conv_bn(x)
      L1 = self.block1(self.block1(I))
      L2 = self.blockl21(self.blockl20(L1))
      L3 = self.blockl31(self.blockl30(L2))
      L4 = self.blockl41(self.blockl40(L3))

      h = self.linear(self.pool(L4)) * 0.125

      ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=h, labels=y)
      loss = tf.reduce_sum(ce)
      correct = tf.reduce_sum(tf.cast(tf.math.equal(tf.argmax(h, axis = 1), y), tf.float32))
      return loss, correct

In [0]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
len_train, len_test = len(x_train), len(x_test)
y_train = y_train.astype('int64').reshape(len_train)
y_test = y_test.astype('int64').reshape(len_test)

train_mean = np.mean(x_train, axis=(0,1,2))
train_std = np.std(x_train, axis=(0,1,2))

normalize = lambda x: ((x - train_mean) / train_std).astype('float32') # todo: check here
pad4 = lambda x: np.pad(x, [(0, 0), (4, 4), (4, 4), (0, 0)], mode='reflect')

x_train = normalize(pad4(x_train))
x_test = normalize(x_test)

In [0]:
model = ResNet18()
batches_per_epoch = len_train//BATCH_SIZE + 1

lr_schedule = lambda t: np.interp([t], [0, (EPOCHS+1)//5, EPOCHS], [0, LEARNING_RATE, 0])[0]
global_step = tf.train.get_or_create_global_step()
lr_func = lambda: lr_schedule(global_step/batches_per_epoch)/BATCH_SIZE
opt = tf.train.MomentumOptimizer(lr_func, momentum=MOMENTUM, use_nesterov=True)
data_aug = lambda x, y: (tf.image.random_flip_left_right(tf.random_crop(x, [32, 32, 3])), y)

In [35]:
t = time.time()

1579801397.2298412

In [36]:
test_set = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)
test_set

<DatasetV1Adapter shapes: ((?, 32, 32, 3), (?,)), types: (tf.float32, tf.int64)>

In [37]:
train_loss = test_loss = train_acc = test_acc = 0.0
print(train_loss, test_loss, train_acc,test_acc)

0.0 0.0 0.0 0.0


In [0]:
tf.keras.backend.set_learning_phase(1)

In [41]:
for (x, y) in tqdm(train_set):
    with tf.GradientTape() as tape:
      loss, correct = model(x, y)
    break


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

In [42]:
loss

<tf.Tensor: id=3517429, shape=(), dtype=float32, numpy=nan>

In [28]:

t = time.time()
test_set = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)

for epoch in range(EPOCHS):
  train_loss = test_loss = train_acc = test_acc = 0.0
  train_set = tf.data.Dataset.from_tensor_slices((x_train, y_train)).map(data_aug).shuffle(len_train).batch(BATCH_SIZE).prefetch(1)

  tf.keras.backend.set_learning_phase(1)
  for (x, y) in tqdm(train_set):
    with tf.GradientTape() as tape:
      loss, correct = model(x, y)

    var = model.trainable_variables
    grads = tape.gradient(loss, var)
    for g, v in zip(grads, var):
      g += v * WEIGHT_DECAY * BATCH_SIZE
    opt.apply_gradients(zip(grads, var), global_step=global_step)

    train_loss += loss.numpy()
    train_acc += correct.numpy()

  tf.keras.backend.set_learning_phase(0)
  for (x, y) in test_set:
    loss, correct = model(x, y)
    test_loss += loss.numpy()
    test_acc += correct.numpy()
    
  print('epoch:', epoch+1, 'lr:', lr_schedule(epoch+1), 'train loss:', train_loss / len_train, 'train acc:', train_acc / len_train, 'val loss:', test_loss / len_test, 'val acc:', test_acc / len_test, 'time:', time.time() - t)

  if test_acc / len_test > 90: break

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

epoch: 1 lr: 0.006666666666666667 train loss: nan train acc: nan val loss: nan val acc: 0.1 time: 67.84300446510315


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

KeyboardInterrupt: ignored

In [15]:
print(x)

tf.Tensor(
[[[[ 1.53875008e-01 -4.01850760e-01 -6.27620816e-01]
   [ 5.86266592e-02 -4.34062719e-01 -5.82646608e-01]
   [-3.69990915e-01 -6.91758573e-01 -7.47552037e-01]
   ...
   [ 6.61866188e-01  1.01547623e+00  4.36768770e-01]
   [ 6.61866188e-01  9.51052248e-01  3.61811757e-01]
   [ 3.91995877e-01  4.51766610e-01  3.20008993e-02]]

  [[-2.42993101e-01 -5.30698657e-01 -5.97638011e-01]
   [-1.79494202e-01 -3.21320802e-01 -4.02749777e-01]
   [-5.28738141e-01 -6.59546554e-01 -7.02577829e-01]
   ...
   [ 6.61866188e-01  9.67158258e-01  4.51760173e-01]
   [ 6.45991445e-01  8.38310361e-01  3.31828952e-01]
   [ 4.71369505e-01  4.03448671e-01  1.70094967e-02]]

  [[ 1.53875008e-01 -4.01850760e-01 -6.27620816e-01]
   [ 5.86266592e-02 -4.34062719e-01 -5.82646608e-01]
   [-3.69990915e-01 -6.91758573e-01 -7.47552037e-01]
   ...
   [ 6.61866188e-01  1.01547623e+00  4.36768770e-01]
   [ 6.61866188e-01  9.51052248e-01  3.61811757e-01]
   [ 3.91995877e-01  4.51766610e-01  3.20008993e-02]]

  ...

 

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])


def test():
    net = ResNet18()
    y = net(torch.randn(1,3,32,32))
    print(y.size())

In [0]:
ResNet18()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=