##  Load packages and configure GPU

In [4]:
!pip3 install tensorflow-models-official

Defaulting to user installation because normal site-packages is not writeable
[31mERROR: Could not find a version that satisfies the requirement tensorflow-models-official (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for tensorflow-models-official[0m[31m
[0m

In [2]:
import tensorflow as tf
import os, sys
import warnings
from cholect50 import dataloader_tf as dataloader
import matplotlib.pyplot as plt
import numpy as np

os.environ['PYTHONPATH'] += ":/code/models"

import tensorflow_models as tfm
from tensorflow_models import vision
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D, ConvLSTM2D
from keras.models import Model, load_model
from keras.initializers import glorot_uniform
from tensorflow.keras import regularizers

from tensorflow.keras.layers import Layer
import tensorflow_addons as tfa

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
warnings.filterwarnings("ignore")
%matplotlib inline

KeyError: 'PYTHONPATH'

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
  try:
    tf.config.set_logical_device_configuration(
        gpus[1],
        [tf.config.LogicalDeviceConfiguration(memory_limit=1024*24)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)

## Load train, test, val dataset using the Dataloader 

In [None]:
BATCH_SIZE = 8

In [None]:
dataset = dataloader.CholecT50( 
          dataset_dir="CholecT50", 
          dataset_variant="cholect50",
          test_fold=1,
          augmentation_list=['original'],
          num_parallel_calls=100
          )

# build dataset
train_dataset, val_dataset, test_dataset = dataset.build()

# train and val data loaders
train_dataloader = train_dataset.batch(BATCH_SIZE) # see tf.data.Dataset for more options
val_dataloader   = val_dataset.batch(BATCH_SIZE)

# test data set is built per video, so load differently
test_dataloaders = []
for video_dataset in test_dataset:
    test_dataloader = video_dataset.batch(BATCH_SIZE)
    test_dataloaders.append(test_dataloader)  

## Inspect Dataset

In [None]:
"""
instruments = [
    "Grasper",
    "Bipolar",
    "Hook",
    "Scissors",
    "Clipper",
    "Irrigator"
]

for (img, (ivt, i, v, t, p)) in train_dataloader.take(1):
    l = [x==1 for x in i.numpy()]
    
lis = np.array(instruments)
fil = np.array(l)

fig, axs = plt.subplots(4, 2, figsize=(12, 10))

for i in range(4):
    for j in range(2):
        axs[i, j].imshow((img.numpy()[i*2+j]).astype(np.uint8))
        axs[i, j].set_title(str(lis[fil[i*2+j]]))
        axs[i, j].axis('off')

plt.subplots_adjust(wspace=0, hspace=0.2)
plt.show()
"""

## Model

In [None]:
# https://stackoverflow.com/questions/63860100/reshape-a-4d-tensor-output-of-a-convolutional-layer-to-5d-tensor-to-be-fed-to-a

NUM_FRAMES = 16
class ReshapeLayer(Layer):
    def call(self,inputs):
        nshape = (BATCH_SIZE, NUM_FRAMES) + inputs.shape[1:]
        return tf.reshape(inputs,nshape)

In [None]:
for (img, (ivt, i, v, t, p)) in train_dataloader.take(1):
    pass

input_shape = np.array(img[0].shape)

print(input_shape)

In [None]:
# Define the input as a tensor with shape input_shape
filters = [64, 128, 256, 512]
strides = [1,   2,   1,   1]

X_input = Input(shape = (256, 448, 3))


# Zero-Padding
X = ZeroPadding2D((3, 3))(X_input)


#[Note]: Using ResNet50-V1 instead 


# Stage 1
X = Conv2D(64, (7, 7), 
           strides=(2, 2), 
           name='conv1', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(name='bnConv1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((3, 3), strides=(2, 2))(X)

for i in range(len(filters)):
    if (i > 0): 
        X = vision.layers.ResidualBlock(filters[i], strides[i], use_projection=True)(X)
        X = vision.layers.ResidualBlock(filters[i], strides[i], use_projection=True)(X)
    else:
        X = vision.layers.ResidualBlock(filters[i], strides[i], use_projection=False)(X)
        X = vision.layers.ResidualBlock(filters[i], strides[i], use_projection=False)(X)
"""        

X = tf.keras.applications.resnet50.ResNet50(
    weights='imagenet',  # Load weights pre-trained on ImageNet.
    input_shape=(256, 448, 3),
    include_top=False
)(X_input)
"""

reshaped_input = ReshapeLayer()(X)

# # Return sequence True will return all the sequence of images
# # if set to false, it will only return the last image 
X = ConvLSTM2D(
     filters=6,
     kernel_size=(1, 1), 
     name='convLSTMLayer', 
     kernel_regularizer = regularizers.L2(1e-5),
     return_sequences=True)(reshaped_input)

# Wildcat Pooling goes here

# Need to move after ConvLSTM in final
X = Conv2D(filters=6, kernel_size=(1, 1), name='LocMapLayer')(X)


model = Model(inputs=X_input, outputs=X, name='WNet')

In [None]:
model.summary()

In [None]:
!pip install git+https://github.com/qubvel/classification_models.git

In [None]:
#### WITH RESNET18 PRETRAINED
from classification_models.keras import Classifiers

ResNet18, preprocess_input = Classifiers.get('resnet18')

X_input = Input(shape = (256, 448, 3))

X = ResNet18(input_shape=(256, 448, 3), weights='imagenet', include_top=False)(X_input)


# reshaped_input = ReshapeLayer()(X)

# # Return sequence True will return all the sequence of images
# # if set to false, it will only return the last image 
# X = ConvLSTM2D(
#     filters=6,
#     kernel_size=(1, 1), 
#     name='convLSTMLayer', 
#     kernel_regularizer = regularizers.L2(1e-5),
#     return_sequences=True)(reshaped_input)

# Wildcat Pooling goes here

# Need to move after ConvLSTM in final
X = Conv2D(filters=6, kernel_size=(1, 1), name='LocMapLayer')(X)


model = Model(inputs=X_input, outputs=X, name='WNet')

In [None]:
model.summary()

## Train model

In [None]:
from tensorflow.nn import weighted_cross_entropy_with_logits as loss_fn

In [None]:
class_weights = [0.08084519, 0.81435289, 0.10459284, 2.55976864, 1.630372490, 1.29528455]

In [None]:
optimizers = [
    tf.keras.optimizers.Adam(learning_rate=1e-1),
    tf.keras.optimizers.Adam(learning_rate=1e-3)
]
optimizers_and_layers = [(optimizers[0], model.layers[0:2]), (optimizers[1], model.layers[2:])]
optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)

In [None]:
model.compile(
    optimizer=optimizer, 
    metrics=['accuracy']
)

In [None]:
epochs = 2
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))


    # Iterate over the batches of the dataset.
    for iteration, (img, (_, label_i, _, _, _)) in enumerate(train_dataloader):
               
        # Open a GradientTape to record the operations run
        # during the forward pass, which enables auto-differentiation.
        with tf.GradientTape() as tape:

            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            logits = model(img, training=True)  # Logits for this minibatch

            # Compute the loss value for this minibatch.
            loss_value = loss_fn(labels=label_i, logits=logits, pos_weight=class_weights)

        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %s samples" % ((step + 1) * batch_size))

In [None]:
for batch, (img, (_, label_i, _, _, _)) in enumerate(train_dataloader):
    print(img.shape)