##  Load packages and configure GPU

In [3]:
#!pip install tensorflow-models-official

In [1]:
import tensorflow as tf
import os, sys
import warnings
from cholect50 import dataloader_tf as dataloader
import matplotlib.pyplot as plt
import numpy as np

#os.environ['PYTHONPATH'] += ":/code/models"

#import tensorflow_models as tfm
#from tensorflow_models import vision
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D, ConvLSTM2D, TimeDistributed
from keras.models import Model, load_model
from keras.initializers import glorot_uniform
from tensorflow.keras import regularizers

from tensorflow.keras.layers import Layer
import tensorflow_addons as tfa

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
warnings.filterwarnings("ignore")
%matplotlib inline


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.9.1 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [70]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
  try:
    tf.config.set_logical_device_configuration(
        gpus[1],
        [tf.config.LogicalDeviceConfiguration(memory_limit=1024*24)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)

2 Physical GPUs, 2 Logical GPUs


## Load train, test, val dataset using the Dataloader 

In [44]:
BATCH_SIZE = 8

In [45]:
dataset = dataloader.CholecT50( 
          dataset_dir="CholecT50", 
          dataset_variant="cholect50",
          test_fold=1,
          augmentation_list=['original'],
          num_parallel_calls=100
          )

# build dataset
train_dataset, val_dataset, test_dataset = dataset.build()

# train and val data loaders
train_dataloader = train_dataset.batch(BATCH_SIZE) # see tf.data.Dataset for more options
val_dataloader   = val_dataset.batch(BATCH_SIZE)

# test data set is built per video, so load differently
test_dataloaders = []
for video_dataset in test_dataset:
    test_dataloader = video_dataset.batch(BATCH_SIZE)
    test_dataloaders.append(test_dataloader)  

## Inspect Dataset

In [None]:
"""
instruments = [
    "Grasper",
    "Bipolar",
    "Hook",
    "Scissors",
    "Clipper",
    "Irrigator"
]

for (img, (ivt, i, v, t, p)) in train_dataloader.take(1):
    l = [x==1 for x in i.numpy()]
    
lis = np.array(instruments)
fil = np.array(l)

fig, axs = plt.subplots(4, 2, figsize=(12, 10))

for i in range(4):
    for j in range(2):
        axs[i, j].imshow((img.numpy()[i*2+j]).astype(np.uint8))
        axs[i, j].set_title(str(lis[fil[i*2+j]]))
        axs[i, j].axis('off')

plt.subplots_adjust(wspace=0, hspace=0.2)
plt.show()
"""

## Model

In [46]:
from tensorflow.keras.layers import Layer

NUM_FRAMES = 16
BATCH_SIZE = 8

class ReshapeLayer(Layer):
    def call(self,inputs):
        nshape = (BATCH_SIZE,NUM_FRAMES) + inputs.shape[1:]
        return tf.reshape(inputs,nshape)
    
class ExpandLayer(Layer):
    def call(self,inputs):
        nshape = (BATCH_SIZE,NUM_FRAMES) + inputs.shape[1:]
        return tf.reshape(inputs,nshape)
    
def wildcat_pooling(img, alpha=0.6, name='Wildcat_Pooling'):
    # Axis: Breadth and Width of the input tensor. Assuming
    # 0 is the batch size. Check if we have a 5D Tensor
    with tf.name_scope(name):
        return tf.math.reduce_max(img, axis=[-3,-2]) + alpha*tf.math.reduce_min(img, axis=[-3,-2])

In [47]:
for (img, (ivt, i, v, t, p)) in train_dataloader.take(1):
    pass

input_shape = np.array(img[0].shape)
print(i)
print(input_shape)

tf.Tensor(
[[1. 0. 1. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0.]
 [1. 0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0.]], shape=(8, 6), dtype=float32)
[256 448   3]


## Temp Model

In [40]:
# Define the input as a tensor with shape input_shape
filters = [64, 128, 256, 512]
strides = [1,   2,   1,   1]

X_input = Input(shape = (256, 448, 3))


# Zero-Padding
X = ZeroPadding2D((3, 3))(X_input)


#[Note]: Using ResNet50-V1 instead 


# Stage 1
X = Conv2D(64, (7, 7), 
           strides=(2, 2), 
           name='conv1', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(name='bnConv1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((3, 3), strides=(2, 2))(X)

for i in range(len(filters)):
    if (i > 0): 
        X = vision.layers.ResidualBlock(filters[i], strides[i], use_projection=True)(X)
        X = vision.layers.ResidualBlock(filters[i], strides[i], use_projection=True)(X)
    else:
        X = vision.layers.ResidualBlock(filters[i], strides[i], use_projection=False)(X)
        X = vision.layers.ResidualBlock(filters[i], strides[i], use_projection=False)(X)
"""        

X = tf.keras.applications.resnet50.ResNet50(
    weights='imagenet',  # Load weights pre-trained on ImageNet.
    input_shape=(256, 448, 3),
    include_top=False
)(X_input)
"""

reshaped_input = ReshapeLayer()(X)

# # Return sequence True will return all the sequence of images
# # if set to false, it will only return the last image 
X = ConvLSTM2D(
     filters=6,
     kernel_size=(1, 1), 
     name='convLSTMLayer', 
     kernel_regularizer = regularizers.L2(1e-5),
     return_sequences=True)(reshaped_input)

# Wildcat Pooling goes here

# Need to move after ConvLSTM in final
X = Conv2D(filters=6, kernel_size=(1, 1), name='LocMapLayer')(X)


model = Model(inputs=X_input, outputs=X, name='WNet')

NameError: name 'vision' is not defined

In [None]:
model.summary()

In [15]:
!pip install git+https://github.com/qubvel/classification_models.git

Defaulting to user installation because normal site-packages is not writeable
Collecting git+https://github.com/qubvel/classification_models.git
  Cloning https://github.com/qubvel/classification_models.git to /tmp/pip-req-build-nf17aht7
  Running command git clone --filter=blob:none --quiet https://github.com/qubvel/classification_models.git /tmp/pip-req-build-nf17aht7
  Resolved https://github.com/qubvel/classification_models.git to commit a0f006e05485a34ccf871c421279864b0ccd220b
  Running command git submodule update --init --recursive -q
  Preparing metadata (setup.py) ... [?25ldone


In [16]:
#### WITH RESNET18 PRETRAINED
from classification_models.keras import Classifiers

ResNet18, preprocess_input = Classifiers.get('resnet18')

X_input = Input(shape = (256, 448, 3))

X = ResNet18(input_shape=(256, 448, 3), weights='imagenet', include_top=False)(X_input)


reshaped_input = ReshapeLayer()(X)

# # Return sequence True will return all the sequence of images
# # if set to false, it will only return the last image 
# X = ConvLSTM2D(
#     filters=6,
#     kernel_size=(1, 1), 
#     name='convLSTMLayer', 
#     kernel_regularizer = regularizers.L2(1e-5),
#     return_sequences=True)(reshaped_input)

X = ConvLSTM2D(
     filters=6,
     kernel_size=(1, 1), 
     name='convLSTMLayer', 
     kernel_regularizer = regularizers.L2(1e-5),
     return_sequences=True)(reshaped_input)

# Wildcat Pooling goes here

# Need to move after ConvLSTM in final
X = Conv2D(filters=6, kernel_size=(1, 1), name='LocMapLayer')(X)


model = Model(inputs=X_input, outputs=X, name='WNet')

In [17]:
model.summary()

Model: "WNet"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 448, 3)]     0         
                                                                 
 model (Functional)          (None, 8, 14, 512)        11186889  
                                                                 
 reshape_layer (ReshapeLayer  (8, 16, 8, 14, 512)      0         
 )                                                               
                                                                 
 convLSTMLayer (ConvLSTM2D)  (8, 16, 8, 14, 6)         12456     
                                                                 
 LocMapLayer (Conv2D)        (8, 16, 8, 14, 6)         42        
                                                                 
Total params: 11,199,387
Trainable params: 11,191,445
Non-trainable params: 7,942
______________________________________________

In [27]:
pretrained_model_for_demo = tf.keras.applications.resnet50.ResNet50(
    include_top=False,
    weights='imagenet',
    input_shape=(256, 448, 3)
)

## Model v1

In [62]:
from classification_models.keras import Classifiers
ResNet18, preprocess_input = Classifiers.get('resnet18')
resnet = ResNet18(input_shape=(256, 448, 3), weights='imagenet', include_top=False)

X_input = Input(shape = (16, 256, 448, 3))
reshaped_input = TimeDistributed(resnet)(X_input)
X = ConvLSTM2D(
     filters=6,
     kernel_size=(1, 1), 
     name='convLSTMLayer', 
     kernel_regularizer = regularizers.L2(1e-5),
     return_state=False)(reshaped_input)

X = Conv2D(filters=6, kernel_size=(1, 1), name='LocMapLayer')(X)
X = wildcat_pooling(X)

model = Model(inputs=X_input, outputs=X, name='WNet')

model.summary()

Model: "WNet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_11 (InputLayer)          [(None, 16, 256, 44  0           []                               
                                8, 3)]                                                            
                                                                                                  
 time_distributed_2 (TimeDistri  (None, 16, 8, 14, 5  11186889   ['input_11[0][0]']               
 buted)                         12)                                                               
                                                                                                  
 convLSTMLayer (ConvLSTM2D)     (None, 8, 14, 6)     12456       ['time_distributed_2[0][0]']     
                                                                                               

## Train model

In [63]:
from tensorflow.nn import weighted_cross_entropy_with_logits as loss_fn

In [64]:
class_weights = tf.convert_to_tensor(
    [0.08084519, 0.81435289, 0.10459284, 2.55976864, 1.630372490, 1.29528455], 
    dtype=tf.float32, dtype_hint=None, name=None
)

In [65]:
optimizers = [
    tf.keras.optimizers.Adam(learning_rate=1e-1),
    tf.keras.optimizers.Adam(learning_rate=1e-3)
]
optimizers_and_layers = [(optimizers[0], model.layers[0:2]), (optimizers[1], model.layers[2:])]
optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)

model.compile(
    optimizer=optimizer, 
    metrics=['accuracy']
)

In [66]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), 
    metrics=['accuracy']
)

In [67]:
epochs = 2
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))


    # Iterate over the batches of the dataset.
    for step, (img, (_, label_i, _, _, _)) in enumerate(train_dataloader):
               
        # Open a GradientTape to record the operations run
        # during the forward pass, which enables auto-differentiation.
        with tf.GradientTape() as tape:

            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            logits = model(img, training=True)  # Logits for this minibatch
            
            #print(label_i)
            
            #label_i=tf.cast(label_i,tf.float32)
            # Compute the loss value for this minibatch.
            loss_value = loss_fn(labels=label_i, logits=logits, pos_weight=class_weights)
            #loss_value = tf.expand_dims(tf.math.reduce_mean(loss_value, axis = -1),axis = -1)
            loss_value = tf.math.reduce_mean(loss_value)
            #print(loss_value)

        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Log every 2 batches.
        if step % 1000 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %s samples" % ((step + 1) * BATCH_SIZE))


Start of epoch 0


ValueError: Input 0 of layer "WNet" is incompatible with the layer: expected shape=(None, 16, 256, 448, 3), found shape=(8, 256, 448, 3)