In [1]:
import tensorflow_datasets as tfds

import tensorflow as tf
import sys
import random
import os
import sys
import numpy as np
import tensorflow_addons as tfa

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
gpus = tf.config.list_physical_devices(device_type = 'GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [4]:
dataset_name = 'ucf101'

ucf101 = tfds.builder(dataset_name)



In [5]:
# config = tfds.download.DownloadConfig(verify_ssl=False)
# ucf101.download_and_prepare(download_config=config)

In [6]:
num_classes = ucf101.info.features['label'].num_classes
num_examples = {
    name: split.num_examples
    for name, split in ucf101.info.splits.items()
}

print('Number of classes:', num_classes)
print('Number of examples for train:', num_examples['train'])
print('Number of examples for test:', num_examples['test'])
print()

# ucf101.info

Number of classes: 101
Number of examples for train: 9537
Number of examples for test: 3783



In [7]:
# Build the training and evaluation datasets.
batch_size = 2
num_frames = 32
resolution = 224

In [8]:


def format_features(features):
  # print("features", features)
  video = features['video']
  print(video.shape, "start", features)


  total_frames = video.shape[1]
  if total_frames == None:
    total_frames = num_frames
  frames = num_frames

  start_idx = random.randint(0, total_frames - frames )
  video = video[:,start_idx:start_idx+32]
  print(video.shape, video.shape[1])

  if video.shape[1] is None or video.shape[1]  < 32:
    video = tf.random.normal((batch_size, 32, 256, 256, 3))
    
  video = tf.reshape(video, [-1, video.shape[2], video.shape[3], 3])
  print("reshape",video.shape)

  
  video = tf.image.resize(video, (resolution, resolution))
  video = tf.reshape(video, [-1, num_frames, resolution, resolution, 3])
  video = tf.transpose(video, perm=(0,4,1,2,3))

  print("transpose",video.shape)
  
  # video = tf.image.random_crop(video, (-1,32,224,224,3))

  # if video.shape[0] is not None:
  #   videos = tf.unstack(video)
  #   for video, i in enumerate(videos):
  #     isFlip = random.choice(["flip", "don't flip"])
  #     if isFlip == "flip":
  #         videos[i]= tf.image.flip_left_right(video)
  #   video = tf.stack(videos)
  video = tf.image.per_image_standardization(video)

  print(video.shape)
  
  label = tf.one_hot(features['label'], num_classes)
  return (video, label)


# format_features(features) 

In [9]:
train_dataset = ucf101.as_dataset(
    split='train',
    batch_size=batch_size,
    shuffle_files=True)
train_dataset = train_dataset.map(
    format_features,
    num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.repeat()
train_dataset = train_dataset.prefetch(2)

test_dataset = ucf101.as_dataset(
    split='test',
    batch_size=batch_size)
test_dataset = test_dataset.map(
    format_features,
    num_parallel_calls=tf.data.AUTOTUNE,
    deterministic=True)
test_dataset = test_dataset.prefetch(2)

2022-09-25 12:01:04.347007: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-25 12:01:05.031525: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15401 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0001:00:00.0, compute capability: 6.0


(None, None, 256, 256, 3) start {'label': <tf.Tensor 'args_0:0' shape=(None,) dtype=int64>, 'video': <tf.Tensor 'args_1:0' shape=(None, None, 256, 256, 3) dtype=uint8>}
(None, None, 256, 256, 3) None
reshape (64, 256, 256, 3)
transpose (2, 3, 32, 224, 224)
(2, 3, 32, 224, 224)
(None, None, 256, 256, 3) start {'label': <tf.Tensor 'args_0:0' shape=(None,) dtype=int64>, 'video': <tf.Tensor 'args_1:0' shape=(None, None, 256, 256, 3) dtype=uint8>}
(None, None, 256, 256, 3) None
reshape (64, 256, 256, 3)
transpose (2, 3, 32, 224, 224)
(2, 3, 32, 224, 224)


In [12]:
# sys.path.append("/home/azureuser/cloudfiles/code/Users/Mohammad.Shoaib/GSOC-22-Video-Swin-Transformers")

In [13]:
# ! python "/home/azureuser/cloudfiles/code/Users/Mohammad.Shoaib/GSOC-22-Video-Swin-Transformers/convert.py" -m "swin_tiny_patch244_window877_kinetics400_1k"

In [14]:
from VideoSwinTransformer import model_configs, SwinTransformer3D, I3DHead_tf

cfg_method = model_configs.MODEL_MAP["swin_tiny_patch244_window877_kinetics400_1k"]
cfg = cfg_method()

name = cfg["name"]
link = cfg['link']
del cfg["name"]
del cfg['link']


In [None]:
def get_model(num_classes,cfg,backbone, shape_of_input=(10,3,32,224,224)):
    inputs = tf.keras.Input(shape_of_input[1:])
    
    x = backbone(inputs, training= True)
    outputs = I3DHead_tf(num_classes, 768, training=True)(x)
    return tf.keras.Model(inputs, outputs)

In [None]:
shape_of_input = (batch_size, 3, num_frames, resolution,resolution)

num_epochs = 2
sample_count = num_examples['train']
warmup_epoch = 3
total_steps = int(num_epochs * sample_count / batch_size)

# Compute the number of warmup batches.
warmup_steps = int(warmup_epoch * sample_count / batch_size)

train_steps = num_examples['train'] // batch_size
total_train_steps = train_steps * num_epochs
test_steps = num_examples['test'] // batch_size


In [None]:
def cosine_decay_with_warmup(global_step,
                             learning_rate_base,
                             total_steps,
                             warmup_learning_rate=0.0,
                             warmup_steps=0,
                             hold_base_rate_steps=0):

    if total_steps < warmup_steps:
        raise ValueError('total_steps must be larger or equal to '
                         'warmup_steps.')

    if not isinstance(global_step, int):
      global_step = 1
     
    learning_rate = 0.5 * learning_rate_base * (1 + np.cos(
        np.pi *
        (global_step - warmup_steps - hold_base_rate_steps
        ) / float(total_steps - warmup_steps - hold_base_rate_steps)))


    if hold_base_rate_steps > 0:
        learning_rate = np.where(global_step > warmup_steps + hold_base_rate_steps,
                                 learning_rate, learning_rate_base)
    if warmup_steps > 0:
        if learning_rate_base < warmup_learning_rate:
            raise ValueError('learning_rate_base must be larger or equal to '
                             'warmup_learning_rate.')
        slope = (learning_rate_base - warmup_learning_rate) / warmup_steps
        warmup_rate = slope * global_step + warmup_learning_rate
        learning_rate = np.where(global_step < warmup_steps, warmup_rate,
                                 learning_rate)
        
    return np.where(global_step > total_steps, 0.0, learning_rate)


class CosineAnnealingSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
  def __init__(self, learning_rate_base,total_steps,warmup_learning_rate=0.0,warmup_steps=0,hold_base_rate_steps=0):
    super().__init__()

    
    self.learning_rate_base = learning_rate_base
    self.total_steps = total_steps
    self.warmup_learning_rate = warmup_learning_rate
    self.warmup_steps = warmup_steps
    self.hold_base_rate_steps = hold_base_rate_steps

  def __call__(self, step):
    lr = cosine_decay_with_warmup(global_step=step,
                                      learning_rate_base=self.learning_rate_base,
                                      total_steps=self.total_steps,
                                      warmup_learning_rate=self.warmup_learning_rate,
                                      warmup_steps=self.warmup_steps,
                                      hold_base_rate_steps=self.hold_base_rate_steps)
    print("lr =", lr)
    return lr

In [None]:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():

    backbone = tf.keras.models.load_model("/home/azureuser/cloudfiles/code/Users/Datasets/swin_tiny_patch244_window877_kinetics400_1k_tf")
    # backbone = SwinTransformer3D(**cfg, shape_of_input=shape_of_input)
    model = get_model(num_classes, cfg, backbone,shape_of_input=shape_of_input)

    lr_backbone = CosineAnnealingSchedule(learning_rate_base=.001,
                                        total_steps=total_steps,
                                        warmup_learning_rate=0.0,
                                        warmup_steps=warmup_steps,
                                        hold_base_rate_steps=0)
    lr_classifier = CosineAnnealingSchedule(learning_rate_base=.01,
                                            total_steps=total_steps,
                                            warmup_learning_rate=0.0,
                                            warmup_steps=warmup_steps,
                                            hold_base_rate_steps=0)

    optimizer_backbone = tfa.optimizers.AdamW(weight_decay= 0.05,learning_rate=lr_backbone, beta_1= 0.9, beta_2=0.999, epsilon=1e-8)
    optimizer_classifier = tfa.optimizers.AdamW(weight_decay= 0.05,learning_rate=lr_classifier, beta_1= 0.9, beta_2=0.999, epsilon=1e-8)



    optimizers_and_layers = [(optimizer_backbone, model.layers[1]), (optimizer_classifier, model.layers[2])]
    optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)

    loss_obj = tf.keras.losses.CategoricalCrossentropy(from_logits=True,label_smoothing=0.1)
    # loss_obj =  tf.keras.losses.SparseCategoricalCrossentropy()

    metrics=["top_k_categorical_accuracy"] 

model.compile(loss=loss_obj, optimizer=optimizer, metrics=metrics)



In [None]:
# strategy = tf.distribute.MirroredStrategy()
# with strategy.scope():

#     backbone = tf.keras.models.load_model("/home/azureuser/cloudfiles/code/Users/Datasets/swin_tiny_patch244_window877_kinetics400_1k_tf")
#     # backbone = SwinTransformer3D(**cfg, shape_of_input=shape_of_input)
#     model = get_model(num_classes, cfg, backbone,shape_of_input=shape_of_input)

#     optimizer = tfa.optimizers.AdamW(weight_decay= 0.05,learning_rate=3e-4, beta_1= 0.9, beta_2=0.999, epsilon=1e-8)
#     metrics=["top_k_categorical_accuracy"] 
#     loss_obj = tf.keras.losses.CategoricalCrossentropy(
#     from_logits=True,
#     label_smoothing=0.1,
#     reduction=tf.keras.losses.Reduction.NONE)
#     # loss_obj=tf.keras.losses.SparseCategoricalCrossentropy()

# model.compile(loss=loss_obj, optimizer=optimizer, metrics=metrics)



INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


In [None]:
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'

print(os.getenv('TF_GPU_ALLOCATOR'))

In [None]:
results = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=num_epochs,
    steps_per_epoch=train_steps,
    validation_steps=test_steps,
    callbacks=[],
    validation_freq=1,
    verbose=1)

In [None]:
a

In [None]:
shape_of_input = (1, 3, 32, 224,224)

X = tf.random.normal(shape_of_input)
y = tf.random.uniform((shape_of_input[0],1), 0, 5, tf.dtypes.int32)
# y

In [None]:
model = get_model(5, cfg, shape_of_input=shape_of_input)

model.summary()

In [None]:
model.compile(loss=loss_obj, optimizer=optimizer, metrics=metrics)


In [None]:
results = model.fit(
    X,
    y,
    epochs=3,
    steps_per_epoch=train_steps,
    validation_steps=test_steps,
    callbacks=callbacks,
    verbose=1)

In [None]:
model.predict(X)