In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [None]:
y_train.shape

(50000, 1)

In [None]:
model1 = ResNet50(
    include_top=True, weights='imagenet', input_tensor=None,
    input_shape=None, pooling=None, classes=1000)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5


In [None]:
import json
from pprint import pprint
json_string = model1.to_json()
json_object = json.loads(json_string)
pprint(json_object)

{'backend': 'tensorflow',
 'class_name': 'Functional',
 'config': {'input_layers': [['input_1', 0, 0]],
            'layers': [{'class_name': 'InputLayer',
                        'config': {'batch_input_shape': [None, 224, 224, 3],
                                   'dtype': 'float32',
                                   'name': 'input_1',
                                   'ragged': False,
                                   'sparse': False},
                        'inbound_nodes': [],
                        'name': 'input_1'},
                       {'class_name': 'ZeroPadding2D',
                        'config': {'data_format': 'channels_last',
                                   'dtype': 'float32',
                                   'name': 'conv1_pad',
                                   'padding': [[3, 3], [3, 3]],
                                   'trainable': True},
                        'inbound_nodes': [[['input_1', 0, 0, {}]]],
                        'name': 'conv1_pad'}

In [None]:
class ResidualBlock(keras.layers.Layer):
  def __init__(self, filters, kernel_size, stride, **kwargs):
    super().__init__(**kwargs)
    self.filter1, self.filter2, self.filter3 = tf.unstack(filters, axis=-1)
    ks1, ks2, ks3 = tf.unstack(kernel_size, axis=-1)
    s1, s2, s3 = tf.unstack(stride, axis=-1)
    self.conv1 = keras.layers.Conv2D(self.filter1,int(ks1),strides=int(s1),padding='same',activation='relu') #Gave me an error for just writing s1 instead
    self.bn1 = keras.layers.BatchNormalization()                                                           #of (s1, s1). Though I have seen cases that 
    self.conv2 = keras.layers.Conv2D(self.filter2,(ks2,ks2),strides=(s2, s2),padding='same',activation='relu') #do the former. Why? s1 and ks1 are tensors 
    self.bn2 = keras.layers.BatchNormalization()                                                             #with shape info. for individual vals passed tensor shape is 0.
    self.conv3 = keras.layers.Conv2D(self.filter3,(ks3,ks3),strides=(s3, s3),padding='same',activation=None) #which is wrong.
    self.bn3 = keras.layers.BatchNormalization()

    self.skip_layer = [] #the extra convolution + bn for the input layer when stride size = 2
    if(s1>1):
      self.skip_layer.append(keras.layers.Conv2D(self.filter3, (1,1),strides=(s1, s1),padding='same'))
      # print("FOR STRIDE=2")
    else:
      self.skip_layer.append(keras.layers.Conv2D(self.filter3, (1,1),strides=(1, 1),padding='same'))
    self.skip_layer.append(keras.layers.BatchNormalization())
    self.activ = keras.layers.Activation("relu")

  def call(self, x):
    z = x
    skip_z = x
    z = self.conv1(z)
    z = self.bn1(z)
    z = self.conv2(z)
    z = self.bn2(z)
    z = self.conv3(z)
    z = self.bn3(z)
    for layer in self.skip_layer:
      # print("SKIP LAYER")
      skip_z = layer(skip_z)
    z = self.activ(z + skip_z)
    # print(z.shape)
    return z

In [None]:
class ResNet50Model(keras.models.Model): #in the textbook, they have imported keras.Model. for layers they import keras.layers.Layer
  def __init__(self, **kwargs):
    super().__init__(**kwargs)
    self.conv1 = keras.layers.Conv2D(64, kernel_size=(7, 7), strides=(2, 2), padding="same", input_shape=(224, 224, 3), activation='relu')
    self.bn1 = keras.layers.BatchNormalization()
    self.maxpool = keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')

    self.stage1_1 = ResidualBlock([64, 64, 256], [1, 3, 1], [1, 1, 1]) # x3 times
    self.bn1_1 = keras.layers.BatchNormalization()
    self.stage1_2 = ResidualBlock([64, 64, 256], [1, 3, 1], [1, 1, 1])
    self.bn1_2 = keras.layers.BatchNormalization()
    self.stage1_3 = ResidualBlock([64, 64, 256], [1, 3, 1], [1, 1, 1])
    self.bn1_3 = keras.layers.BatchNormalization()

    self.stage2_1 = ResidualBlock([128, 128, 512], [1, 3, 1], [2, 1, 1]) #once with s1 = 2, 3 mores times with s1 = 1
    self.bn2_1 = keras.layers.BatchNormalization()
    self.stage2_2 = ResidualBlock([128, 128, 512], [1, 3, 1], [1, 1, 1])
    self.bn2_2 = keras.layers.BatchNormalization()
    self.stage2_3 = ResidualBlock([128, 128, 512], [1, 3, 1], [1, 1, 1])
    self.bn2_3 = keras.layers.BatchNormalization()
    self.stage2_4 = ResidualBlock([128, 128, 512], [1, 3, 1], [1, 1, 1])
    self.bn2_4 = keras.layers.BatchNormalization()

    self.stage3_1 = ResidualBlock([256, 256, 1024], [1, 3, 1], [2, 1, 1]) #once with s1 = 2, 5 mores times with s1 = 1
    self.bn3_1 = keras.layers.BatchNormalization()
    self.stage3_2 = ResidualBlock([256, 256, 1024], [1, 3, 1], [1, 1, 1])
    self.bn3_2 = keras.layers.BatchNormalization()
    self.stage3_3 = ResidualBlock([256, 256, 1024], [1, 3, 1], [1, 1, 1])
    self.bn3_3 = keras.layers.BatchNormalization()
    self.stage3_4 = ResidualBlock([256, 256, 1024], [1, 3, 1], [1, 1, 1])
    self.bn3_4 = keras.layers.BatchNormalization()
    self.stage3_5 = ResidualBlock([256, 256, 1024], [1, 3, 1], [1, 1, 1])
    self.bn3_5 = keras.layers.BatchNormalization()
    self.stage3_6 = ResidualBlock([256, 256, 1024], [1, 3, 1], [1, 1, 1])
    self.bn3_6 = keras.layers.BatchNormalization()

    self.stage4_1 = ResidualBlock([512, 512, 2048], [1, 3, 1], [2, 1, 1]) # x3 times
    self.bn4_1 = keras.layers.BatchNormalization()
    self.stage4_2 = ResidualBlock([512, 512, 2048], [1, 3, 1], [1, 1, 1])
    self.bn4_2 = keras.layers.BatchNormalization()
    self.stage4_3 = ResidualBlock([512, 512, 2048], [1, 3, 1], [1, 1, 1])
    self.bn4_3 = keras.layers.BatchNormalization()

    self.globalavgpool = keras.layers.GlobalAveragePooling2D()
    self.flatten = keras.layers.Flatten()
    self.dense = keras.layers.Dense(10, activation='softmax')

  def call(self, x):
    Z = self.conv1(x)
    Z = self.bn1(Z)
    Z = self.maxpool(Z)

    Z = self.stage1_1(Z)
    Z = self.bn1_1(Z)
    Z = self.stage1_2(Z)
    Z = self.bn1_2(Z)
    Z = self.stage1_3(Z)
    Z = self.bn1_3(Z)

    Z = self.stage2_1(Z)
    Z = self.bn2_1(Z)
    Z = self.stage2_2(Z)
    Z = self.bn2_2(Z)
    Z = self.stage2_3(Z)
    Z = self.bn2_3(Z)
    Z = self.stage2_4(Z)
    Z = self.bn2_4(Z)

    Z = self.stage3_1(Z)
    Z = self.bn3_1(Z)
    Z = self.stage3_2(Z)
    Z = self.bn3_2(Z)
    Z = self.stage3_3(Z)
    Z = self.bn3_3(Z)
    Z = self.stage3_4(Z)
    Z = self.bn3_4(Z)
    Z = self.stage3_5(Z)
    Z = self.bn3_5(Z)
    Z = self.stage3_6(Z)
    Z = self.bn3_6(Z)

    Z = self.stage4_1(Z)
    Z = self.bn4_1(Z)
    Z = self.stage4_2(Z)
    Z = self.bn4_2(Z)
    Z = self.stage4_3(Z)
    Z = self.bn4_3(Z)

    Z = self.globalavgpool(Z)
    Z = self.flatten(Z)
    Z = self.dense(Z)
    return Z
  

In [None]:
model = ResNet50Model()

In [None]:
model.build((1, 224, 224, 3))

In [None]:
model.summary()

Model: "res_net50model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              multiple                  9472      
_________________________________________________________________
batch_normalization (BatchNo multiple                  256       
_________________________________________________________________
max_pooling2d (MaxPooling2D) multiple                  0         
_________________________________________________________________
residual_block (ResidualBloc multiple                  76928     
_________________________________________________________________
batch_normalization_5 (Batch multiple                  1024      
_________________________________________________________________
residual_block_1 (ResidualBl multiple                  138368    
_________________________________________________________________
batch_normalization_10 (Batc multiple               

In [None]:
# def print_status_bar(iteration, total, loss, metrics=None):
#   metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result()) for m in [loss] + (metrics or []) ])
#   end = "" if iteration<total else "\n"
#   print("\r{}/{} - ".format(iteration, total) + metrics, end=end)

In [None]:
n_epochs = 1
batch_size = 32
n_steps = len(x_train)/batch_size

In [None]:
x_train = x_train/255.0
x_test = x_test/255.0

In [None]:
x_train = x_train.astype('float32')
y_train = y_train.astype('int32')
x_test = x_test.astype('float32')
y_test = y_test.astype('int32')

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
# train_ds = train_ds.map(lambda x, y: tf.cast(x, tf.float32), y)
# train_ds.take(1)


In [None]:
optimizer = keras.optimizers.Nadam(lr = 0.01)
loss_fn = keras.losses.SparseCategoricalCrossentropy()

In [None]:
metrics = keras.metrics.SparseCategoricalAccuracy() 
metric_test = keras.metrics.SparseCategoricalAccuracy() 

In [None]:
for epoch in range(1, n_epochs+1):
  print("Epoch {}/{}".format(epoch, n_epochs))
  for step,(X_batch, y_batch) in enumerate(train_ds):
    # X_batch, y_batch = train_ds
    # with tf.GradientTape() as tape:
    #   y_pred = model(X_batch, training=True)
    #   loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
    # gradients = tape.gradient(loss, model.trainable_variables)
    # optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    # metrics.update_state(y_batch, y_pred)
    loss = training_step(X_batch, y_batch)
    # print_status_bar(step*batch_size, len(y_train), loss, metrics)
    if(step%100==0):
      print('epoch: {} step: {} loss: {}  accuracy: {}'.format(epoch, step, loss, metrics.result()), end='\n')
  metrics.reset_states()
  for _step,(_X_batch, _y_batch) in enumerate(test_ds):
    _y_pred = model(_X_batch, training=False)
    loss = tf.reduce_mean(loss_fn(_y_batch, _y_pred))
    metric_test.update_state(y_batch, y_pred)
  print('epoch: {} step_val: {} loss_val: {}  accuracy_val: {}'.format(epoch, _step, loss, metric_test.result()), end='\n')
  metric_test.reset_states()

Epoch 1/1
epoch: 1 step: 0 loss: 30.999771118164062  accuracy: 0.15625
epoch: 1 step: 100 loss: 24.512500762939453  accuracy: 0.09158416092395782
epoch: 1 step: 200 loss: 25.86250114440918  accuracy: 0.09375
epoch: 1 step: 300 loss: 26.78750228881836  accuracy: 0.09385382384061813
epoch: 1 step: 400 loss: 23.69998550415039  accuracy: 0.09445136785507202
epoch: 1 step: 500 loss: 22.924999237060547  accuracy: 0.09406187385320663
epoch: 1 step: 600 loss: 20.825000762939453  accuracy: 0.09463394433259964
epoch: 1 step: 700 loss: 26.350000381469727  accuracy: 0.09539943188428879
epoch: 1 step: 800 loss: 24.78750228881836  accuracy: 0.09531054645776749
epoch: 1 step: 900 loss: 18.875  accuracy: 0.09558823704719543
epoch: 1 step: 1000 loss: 22.299999237060547  accuracy: 0.09577921777963638
epoch: 1 step: 1100 loss: 30.962501525878906  accuracy: 0.09576521068811417
epoch: 1 step: 1200 loss: 38.05000305175781  accuracy: 0.09629996120929718
epoch: 1 step: 1300 loss: 30.96165657043457  accuracy: 

InvalidArgumentError: ignored

In [None]:
@tf.function
def training_step(x, y):
  with tf.GradientTape() as tape:
      y_pred = model(x, training=True)
      loss = tf.reduce_mean(loss_fn(y, y_pred))
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  metrics.update_state(y, y_pred)
  return loss

Training loop still in the works. Looking for ways to get the imagenet dataset, otherwise will have to use some other readily available dataset.

In [None]:
for epoch in range(1, n_epochs+1):
  print("Epoch {}/{}".format(epoch, n_epochs))
  for step in range(1, n_steps+1):
    # X_batch, y_batch = -------------TODO
    with tf.GradientTape() as tape:
      y_pred = model(X_batch, training=True)
      loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    mean_loss(loss)
    for metric in metrics:
      metric(y_batch, y_pred)
    print_status_bar(step*batch_size, len(y_train), mean_loss, metrics)
  print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
  for metric in [mean_loss] + metrics:
    metric.reset_states()

@tf.custom_gradient

In [None]:
@tf.custom_gradient
def log_of_exp(x):
  def grad(dy): 
    print(dy)# dy=gradients from previous terms as per chain rule
    return dy*(1-(1/(1+tf.exp(x))))
  return tf.math.log(1+tf.exp(x)), grad

In [None]:
def der_log_exp(x):
  return 1-(1/(1+tf.exp(x)))

In [None]:
x = tf.Variable(3.0)
with tf.GradientTape() as tape:
  # y = log_of_exp(x)
  y = (x**2)+3
  z = log_of_exp(y)
  z2 = z**2
grad = tape.gradient(z2, x)
print(grad)
# print(der_log_exp(x))

tf.Tensor(24.000011, shape=(), dtype=float32)
tf.Tensor(143.99919, shape=(), dtype=float32)


In [None]:
log_of_exp(12.0)*2

<tf.Tensor: shape=(), dtype=float32, numpy=24.000011>

In [None]:
x = tf.Variable(3.0, name='x')
x1 = tf.constant(4.0,name='x1')
x2 = tf.Variable(2.0, name='x2')
with tf.GradientTape() as tape:
  tape.watch(x1)
  y = (x**2)+ (3*x1) + tf.stop_gradient(x2**3)
  # z = log_of_exp(y)
  # z2 = z**2
grad = tape.gradient(y, [x, x1, x2])
print(grad)
# print(der_log_exp(x))

[<tf.Tensor: shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: shape=(), dtype=float32, numpy=3.0>, None]
