In [None]:

import tensorflow as tf
import sys
import random
import os
import sys
import tensorflow_addons as tfa


In [20]:
import tensorflow_datasets as tfds

dataset_name = 'ucf101'
ucf101 = tfds.builder(dataset_name)
config = tfds.download.DownloadConfig(verify_ssl=False)
# ucf101.download_and_prepare(download_config=config)

In [23]:
num_classes = ucf101.info.features['label'].num_classes
num_examples = {
    name: split.num_examples
    for name, split in ucf101.info.splits.items()
}

print('Number of classes:', num_classes)
# print('Number of examples for train:', num_examples['train'])
# print('Number of examples for test:', num_examples['test'])
# print()

ucf101.info


Number of classes: 101


tfds.core.DatasetInfo(
    name='ucf101',
    full_name='ucf101/ucf101_1_256/2.0.0',
    description="""
    A 101-label video classification dataset.
    """,
    config_description="""
    256x256 UCF with the first action recognition split.
    """,
    homepage='https://www.crcv.ucf.edu/data-sets/ucf101/',
    data_path='~\\tensorflow_datasets\\ucf101\\ucf101_1_256\\2.0.0',
    file_format=tfrecord,
    download_size=Unknown size,
    dataset_size=Unknown size,
    features=FeaturesDict({
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=101),
        'video': Video(Image(shape=(256, 256, 3), dtype=tf.uint8)),
    }),
    supervised_keys=None,
    disable_shuffling=False,
    splits={
    },
    citation="""@article{DBLP:journals/corr/abs-1212-0402,
      author    = {Khurram Soomro and
                   Amir Roshan Zamir and
                   Mubarak Shah},
      title     = {{UCF101:} {A} Dataset of 101 Human Actions Classes From Videos in
                   The

In [24]:
# Build the training and evaluation datasets.
batch_size = 8
num_frames = 32
resolution = 224

In [None]:


def format_features(features):
  # print("features", features)
  video = features['video']
  print(video.shape, "start", features)


  total_frames = video.shape[1]
  if total_frames == None:
    total_frames = 32
  frames = 32

  start_idx = random.randint(0, total_frames - frames )
  video = video[:,start_idx:start_idx+32]
  print(video.shape)
  video = tf.reshape(video, [-1, video.shape[2], video.shape[3], 3])
  print("reshape",video.shape)

  
  video = tf.image.resize(video, (224, 224))
  video = tf.reshape(video, [-1, num_frames, resolution, resolution, 3])
  video = tf.transpose(video, perm=(0,4,1,2,3))

  print(video.shape)

  # video = tf.image.random_crop(video, (-1,32,224,224,3))

  if video.shape[0] is not None:
    videos = tf.unstack(video)
    for video, i in enumerate(videos):
      isFlip = random.choice(["flip", "don't flip"])
      if isFlip == "flip":
          videos[i]= tf.image.flip_left_right(video)
    video = tf.stack(videos)
  video = tf.image.per_image_standardization(video)


  label = tf.one_hot(features['label'], num_classes)
  return (video, label)


# format_features(features) 

In [None]:
train_dataset = ucf101.as_dataset(
    split='train',
    batch_size=batch_size,
    shuffle_files=True)
train_dataset = train_dataset.map(
    format_features,
    num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.repeat()
train_dataset = train_dataset.prefetch(2)

test_dataset = ucf101.as_dataset(
    split='test',
    batch_size=batch_size)
test_dataset = test_dataset.map(
    format_features,
    num_parallel_calls=tf.data.AUTOTUNE,
    deterministic=True)
test_dataset = test_dataset.prefetch(2)

In [10]:
sys.path.append("GSOC-22-Video-Swin-Transformers")


In [None]:
# ! python "GSOC-22-Video-Swin-Transformers/convert.py" -m "swin_tiny_patch244_window877_kinetics400_1k"


In [8]:
backbone = tf.keras.models.load_model("GSOC-22-Video-Swin-Transformers/swin_tiny_patch244_window877_kinetics400_1k_tf")

In [11]:
from VideoSwinTransformer import model_configs, SwinTransformer3D, I3DHead_tf

cfg_method = model_configs.MODEL_MAP["swin_tiny_patch244_window877_kinetics400_1k"]
cfg = cfg_method()

name = cfg["name"]
link = cfg['link']
del cfg["name"]
del cfg['link']


In [25]:
def get_model(num_classes,cfg, shape_of_input=(10,3,32,224,224)):
    inputs = tf.keras.Input(shape_of_input[1:])
    # backbone = SwinTransformer3D(**cfg, shape_of_input=shape_of_input)
    x = backbone(inputs, training= True)
    outputs = I3DHead_tf(num_classes, 768, training=True)(x)
    return tf.keras.Model(inputs, outputs)

In [26]:
shape_of_input = (batch_size, 3, 32, 224,224)
model = get_model(num_classes, cfg, shape_of_input=shape_of_input)
model.summary()

101 768 {'type': 'CrossEntropyLoss'} avg 0.5 0.01 {'training': True}
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 3, 32, 224, 224)  0         
                             ]                                   
                                                                 
 swin_transformer3d (SwinTra  (None, 768, 16, 7, 7)    29694438  
 nsformer3D)                                                     
                                                                 
 i3d_head_tf (I3DHead_tf)    (None, 101)               77669     
                                                                 
Total params: 29,772,107
Trainable params: 27,928,139
Non-trainable params: 1,843,968
_________________________________________________________________


In [27]:
optimizer = tfa.optimizers.AdamW(weight_decay= 0.05,learning_rate=3e-4, beta_1= 0.9, beta_2=0.999, epsilon=1e-8)

metrics=["top_k_categorical_accuracy", "categorical_accuracy"] 
loss_obj = tf.keras.losses.CategoricalCrossentropy(
    # from_logits=True,
    label_smoothing=0.1)

model.compile(loss=loss_obj, optimizer=optimizer, metrics=metrics)



In [34]:
num_epochs = 3

callbacks = [
    tf.keras.callbacks.TensorBoard(),
]

train_steps = num_examples['train'] // batch_size
total_train_steps = train_steps * num_epochs
test_steps = num_examples['test'] // batch_size


# loss_obj=tf.keras.losses.SparseCategoricalCrossentropy()


KeyError: 'train'

In [None]:
results = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=num_epochs,
    steps_per_epoch=train_steps,
    validation_steps=test_steps,
    callbacks=callbacks,
    validation_freq=1,
    verbose=1)

In [29]:
shape_of_input = (8, 3, 32, 224,224)

X = tf.random.normal(shape_of_input)
y = tf.random.uniform((shape_of_input[0],1), 0, 5, tf.dtypes.int32)
# y

In [30]:
model = get_model(5, cfg, shape_of_input=shape_of_input)

model.summary()

5 768 {'type': 'CrossEntropyLoss'} avg 0.5 0.01 {'training': True}
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 3, 32, 224, 224)  0         
                             ]                                   
                                                                 
 swin_transformer3d (SwinTra  (None, 768, 16, 7, 7)    29694438  
 nsformer3D)                                                     
                                                                 
 i3d_head_tf_1 (I3DHead_tf)  (None, 5)                 3845      
                                                                 
Total params: 29,698,283
Trainable params: 27,854,315
Non-trainable params: 1,843,968
_________________________________________________________________


In [35]:
model.compile(loss=loss_obj, optimizer=optimizer, metrics=metrics)


In [36]:
results = model.fit(
    X,
    y,
    epochs=3,
    steps_per_epoch=5,
    validation_steps=2,
    callbacks=callbacks,
    verbose=1)

Epoch 1/3


ValueError: in user code:

    File "c:\Program Files\Python37\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Program Files\Python37\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Program Files\Python37\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Program Files\Python37\lib\site-packages\keras\engine\training.py", line 994, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Program Files\Python37\lib\site-packages\keras\engine\training.py", line 1053, in compute_loss
        y, y_pred, sample_weight, regularization_losses=self.losses
    File "c:\Program Files\Python37\lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Program Files\Python37\lib\site-packages\keras\losses.py", line 152, in __call__
        losses = call_fn(y_true, y_pred)
    File "c:\Program Files\Python37\lib\site-packages\keras\losses.py", line 272, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Program Files\Python37\lib\site-packages\keras\losses.py", line 1991, in categorical_crossentropy
        y_true, y_pred, from_logits=from_logits, axis=axis
    File "c:\Program Files\Python37\lib\site-packages\keras\backend.py", line 5529, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (2, 1) and (2, 5) are incompatible
