In [1]:
import tensorflow_datasets as tfds

import tensorflow as tf
import sys
import random
import os
import sys
import tensorflow_addons as tfa

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset_name = 'ucf101'

ucf101 = tfds.builder(dataset_name)



In [3]:
# config = tfds.download.DownloadConfig(verify_ssl=False)
# ucf101.download_and_prepare(download_config=config)

In [3]:
num_classes = ucf101.info.features['label'].num_classes
num_examples = {
    name: split.num_examples
    for name, split in ucf101.info.splits.items()
}

print('Number of classes:', num_classes)
print('Number of examples for train:', num_examples['train'])
print('Number of examples for test:', num_examples['test'])
print()

# ucf101.info

Number of classes: 101
Number of examples for train: 9537
Number of examples for test: 3783



In [4]:
# Build the training and evaluation datasets.
batch_size = 8
num_frames = 32
frame_stride = 10
resolution = 224

def format_features(features):
  # print("features", features)
  video = features['video']
  print(video.shape, "start", features)


  total_frames = video.shape[1]
  if total_frames == None:
    total_frames = 32
  frames = 32

  start_idx = random.randint(0, total_frames - frames )
  video = video[:,start_idx:start_idx+32]
  print(video.shape)
  video = tf.reshape(video, [-1, video.shape[2], video.shape[3], 3])
  print("reshape",video.shape)

  
  video = tf.image.resize(video, (224, 224))
  video = tf.reshape(video, [-1, num_frames, resolution, resolution, 3])
  video = tf.transpose(video, perm=(0,4,1,2,3))

  print(video.shape)

  # video = tf.image.random_crop(video, (-1,32,224,224,3))

  if video.shape[0] is not None:
    videos = tf.unstack(video)
    for video, i in enumerate(videos):
      isFlip = random.choice(["flip", "don't flip"])
      if isFlip == "flip":
          videos[i]= tf.image.flip_left_right(video)
    video = tf.stack(videos)
  video = tf.image.per_image_standardization(video)


  label = tf.one_hot(features['label'], num_classes)
  return (video, label)


# format_features(features) 

In [5]:
train_dataset = ucf101.as_dataset(
    split='train',
    batch_size=batch_size,
    shuffle_files=True)
train_dataset = train_dataset.map(
    format_features,
    num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.repeat()
train_dataset = train_dataset.prefetch(2)

test_dataset = ucf101.as_dataset(
    split='test',
    batch_size=batch_size)
test_dataset = test_dataset.map(
    format_features,
    num_parallel_calls=tf.data.AUTOTUNE,
    deterministic=True)
test_dataset = test_dataset.prefetch(2)

2022-09-14 03:38:41.132139: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-09-14 03:38:41.132217: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (gsoc28gb): /proc/driver/nvidia/version does not exist
2022-09-14 03:38:41.147003: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


(None, None, 256, 256, 3) start {'label': <tf.Tensor 'args_0:0' shape=(None,) dtype=int64>, 'video': <tf.Tensor 'args_1:0' shape=(None, None, 256, 256, 3) dtype=uint8>}
(None, None, 256, 256, 3)
reshape (None, 256, 256, 3)
(None, 3, 32, 224, 224)
(None, None, 256, 256, 3) start {'label': <tf.Tensor 'args_0:0' shape=(None,) dtype=int64>, 'video': <tf.Tensor 'args_1:0' shape=(None, None, 256, 256, 3) dtype=uint8>}
(None, None, 256, 256, 3)
reshape (None, 256, 256, 3)
(None, 3, 32, 224, 224)


In [6]:
sys.path.append("/home/azureuser/cloudfiles/code/Users/Mohammad.Shoaib/GSOC-22-Video-Swin-Transformers")


In [8]:
# ! python "/home/azureuser/cloudfiles/code/Users/Mohammad.Shoaib/GSOC-22-Video-Swin-Transformers/convert.py" -m "swin_tiny_patch244_window877_kinetics400_1k"


In [7]:
backbone = tf.keras.models.load_model("/home/azureuser/cloudfiles/code/Users/Datasets/swin_tiny_patch244_window877_kinetics400_1k_tf")

In [8]:
from VideoSwinTransformer import model_configs, SwinTransformer3D, I3DHead_tf

cfg_method = model_configs.MODEL_MAP["swin_tiny_patch244_window877_kinetics400_1k"]
cfg = cfg_method()

name = cfg["name"]
link = cfg['link']
del cfg["name"]
del cfg['link']


No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


In [17]:
def get_model(num_classes,cfg, shape_of_input=(10,3,32,224,224)):
    inputs = tf.keras.Input(shape_of_input[1:])
    backbone = SwinTransformer3D(**cfg, shape_of_input=shape_of_input)
    x = backbone(inputs, training= True)
    outputs = I3DHead_tf(num_classes, 768, training=True)(x)
    return tf.keras.Model(inputs, outputs)

In [18]:
shape_of_input = (batch_size, 3, 32, 224,224)
model = get_model(num_classes, cfg, shape_of_input=shape_of_input)
model.summary()

shape_of_input:  (8, 3, 32, 224, 224)
101 768 {'type': 'CrossEntropyLoss'} avg 0.5 0.01 {'training': True}
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 3, 32, 224, 224)  0         
                             ]                                   
                                                                 
 swin_transformer3d_1 (SwinT  (8, 768, 16, 7, 7)       29694438  
 ransformer3D)                                                   
                                                                 
 i3d_head_tf_1 (I3DHead_tf)  (8, 101)                  77669     
                                                                 
Total params: 29,772,107
Trainable params: 27,928,139
Non-trainable params: 1,843,968
_________________________________________________________________


In [19]:
optimizer = tfa.optimizers.AdamW(weight_decay= 0.05,learning_rate=3e-4, beta_1= 0.9, beta_2=0.999, epsilon=1e-8)

metrics=["top_k_categorical_accuracy", "categorical_accuracy"] 
loss_obj = tf.keras.losses.CategoricalCrossentropy(
    # from_logits=True,
    label_smoothing=0.1)

model.compile(loss=loss_obj, optimizer=optimizer, metrics=metrics)



In [20]:
num_epochs = 3

train_steps = num_examples['train'] // batch_size
total_train_steps = train_steps * num_epochs
test_steps = num_examples['test'] // batch_size


# loss_obj=tf.keras.losses.SparseCategoricalCrossentropy()
callbacks = [
    tf.keras.callbacks.TensorBoard(),
]

In [21]:
results = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=num_epochs,
    steps_per_epoch=train_steps,
    validation_steps=test_steps,
    callbacks=callbacks,
    validation_freq=1,
    verbose=1)

Epoch 1/3
  10/1192 [..............................] - ETA: 34:51:56 - loss: 4.6814 - top_k_categorical_accuracy: 0.0125 - categorical_accuracy: 0.0000e+00

KeyboardInterrupt: 

In [None]:
shape_of_input = (8, 3, 32, 224,224)

X = tf.random.normal(shape_of_input)
y = tf.random.uniform((shape_of_input[0],1), 0, 5, tf.dtypes.int32)
# y

In [None]:
model = get_model(5, cfg, shape_of_input=shape_of_input)

model.summary()

In [None]:
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), optimizer=optimizer, metrics=metrics)


In [None]:
results = model.fit(
    X,
    y,
    epochs=3,
    steps_per_epoch=train_steps,
    validation_steps=test_steps,
    callbacks=callbacks,
    verbose=1)