From 4b7286d0266198e46623d4824cfba18fc635b917 Mon Sep 17 00:00:00 2001
From: Sujoy Kumar Goswami <sujoykumargoswami@gmail.com>
Date: Mon, 10 Jan 2022 01:13:33 +0530
Subject: [PATCH 1/3] Add files via upload

Video Classification Model Examination
---
 examples/vision/video_classify.py | 214 ++++++++++++++++++++++++++++++
 1 file changed, 214 insertions(+)
 create mode 100644 examples/vision/video_classify.py

diff --git a/examples/vision/video_classify.py b/examples/vision/video_classify.py
new file mode 100644
index 0000000000..0b2a3a1204
--- /dev/null
+++ b/examples/vision/video_classify.py
@@ -0,0 +1,214 @@
+"""
+Title: Video Classification With TimeDistributed Layer
+Author: [Sujoy K Goswami] (https://www.linkedin.com/in/sujoy-kumar-goswami/)
+Date created: 2022/01/09
+Last modified: 2022/01/09
+Description: Guide to examine quickly any video-classification-model without GPU.
+"""
+
+"""
+## Introduction
+
+Video Classification DL Models are heavy & need huge size of data.
+So it is time-consuming & expensive. Here it is shown, how to examine your model
+quickly, before feeding the actual data, & that also without need of any GPU.
+
+Here video dataset will be created; a white rectangle moving in different directions,
+on a black canvas. The sample code for creating left-moving-rectangle video is below.
+"""
+
+import numpy as np
+import skvideo.io as sk
+
+# creating sample video data
+num_vids = 5
+num_imgs = 50
+img_size = 50
+min_object_size = 1
+max_object_size = 5
+
+for i_vid in range(num_vids):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    vid_name = "vid" + str(i_vid) + ".mp4"
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while x > 0:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        x = x - 1
+        i_img = i_img + 1
+    sk.vwrite(vid_name, imgs.astype(np.uint8))
+
+"""shell
+# play a video
+from IPython.display import Video
+
+Video("vid3.mp4")  # the script & video generated should be in same folder
+"""
+"""
+## Data Generation & Preparation
+
+Now dataset with 4 classes will be created where, a rectangle is moving in 4
+different directions in those classes respectively.
+"""
+
+import numpy as np
+
+# preparing dataset
+X_train = []
+Y_train = []
+labels = {0: "left", 1: "right", 2: "up", 3: "down"}  # 4 classes
+num_vids = 30
+num_imgs = 30
+img_size = 30
+min_object_size = 1
+max_object_size = 5
+# video frames with left moving object
+for i_vid in range(num_vids):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    # vid_name = 'vid' + str(i_vid) + '.mp4'
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while x > 0:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        x = x - 1
+        i_img = i_img + 1
+    X_train.append(imgs)
+for i in range(0, num_imgs):
+    Y_train.append(0)
+# video frames with right moving object
+for i_vid in range(num_vids):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    # vid_name = 'vid' + str(i_vid) + '.mp4'
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while x < img_size:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        x = x + 1
+        i_img = i_img + 1
+    X_train.append(imgs)
+for i in range(0, num_imgs):
+    Y_train.append(1)
+# video frames with up moving object
+for i_vid in range(num_vids):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    # vid_name = 'vid' + str(i_vid) + '.mp4'
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while y > 0:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        y = y - 1
+        i_img = i_img + 1
+    X_train.append(imgs)
+for i in range(0, num_imgs):
+    Y_train.append(2)
+# video frames with down moving object
+for i_vid in range(num_vids):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    # vid_name = 'vid' + str(i_vid) + '.mp4'
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while y < img_size:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        y = y + 1
+        i_img = i_img + 1
+    X_train.append(imgs)
+for i in range(0, num_imgs):
+    Y_train.append(3)
+
+# data pre-processing
+from tensorflow.keras.utils import to_categorical
+
+X_train = np.array(X_train, dtype=np.float32) / 255
+X_train = X_train.reshape(X_train.shape[0], num_imgs, img_size, img_size, 1)
+print(X_train.shape)
+Y_train = np.array(Y_train, dtype=np.uint8)
+Y_train = Y_train.reshape(X_train.shape[0], 1)
+print(Y_train.shape)
+Y_train = to_categorical(Y_train, 4)
+
+"""
+## Model Building & Training
+
+TimeDistributed layer is used to pass temporal information to the network.
+**No GPU is needed.** Training gets completed within few minutes.
+"""
+
+# building model
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout
+from tensorflow.keras.layers import MaxPooling2D
+from tensorflow.keras.layers import LSTM
+from tensorflow.keras.layers import TimeDistributed
+
+model = Sequential()
+model.add(
+    TimeDistributed(
+        Conv2D(8, (3, 3), strides=(1, 1), activation="relu", padding="same"),
+        input_shape=(num_imgs, img_size, img_size, 1),
+    )
+)
+model.add(
+    TimeDistributed(
+        Conv2D(8, (3, 3), kernel_initializer="he_normal", activation="relu")
+    )
+)
+model.add(TimeDistributed(MaxPooling2D((1, 1), strides=(1, 1))))
+model.add(TimeDistributed(Flatten()))
+model.add(Dropout(0.3))
+model.add(LSTM(64, return_sequences=False, dropout=0.3))
+model.add(Dense(4, activation="softmax"))
+model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
+model.summary()
+# model training
+model.fit(X_train, Y_train, epochs=30, verbose=1)
+
+"""
+## Model Inferencing
+
+Model is tested on new generated video data
+"""
+
+# model testing with new data (4 videos)
+X_test = []
+Y_test = []
+for i_vid in range(2):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while x < img_size:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        x = x + 1
+        i_img = i_img + 1
+    X_test.append(imgs)  # 2nd class - ‘right'
+for i_vid in range(2):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while y < img_size:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        y = y + 1
+        i_img = i_img + 1
+    X_test.append(imgs)  # 4th class - ‘down'
+X_test = np.array(X_test, dtype=np.float32) / 255
+X_test = X_test.reshape(X_test.shape[0], num_imgs, img_size, img_size, 1)
+pred = np.argmax(model.predict(X_test), axis=-1)
+for i in range(len(X_test)):
+    print(labels[pred[i]])
+
+"""
+Clearly, the model is performing well on this dataset.
+"""

From 4506d3d32ad4b55245736476994ce3079b954c75 Mon Sep 17 00:00:00 2001
From: Sujoy Kumar Goswami <sujoykumargoswami@gmail.com>
Date: Mon, 10 Jan 2022 05:31:21 +0530
Subject: [PATCH 2/3] Delete video_classify.py

---
 examples/vision/video_classify.py | 214 ------------------------------
 1 file changed, 214 deletions(-)
 delete mode 100644 examples/vision/video_classify.py

diff --git a/examples/vision/video_classify.py b/examples/vision/video_classify.py
deleted file mode 100644
index 0b2a3a1204..0000000000
--- a/examples/vision/video_classify.py
+++ /dev/null
@@ -1,214 +0,0 @@
-"""
-Title: Video Classification With TimeDistributed Layer
-Author: [Sujoy K Goswami] (https://www.linkedin.com/in/sujoy-kumar-goswami/)
-Date created: 2022/01/09
-Last modified: 2022/01/09
-Description: Guide to examine quickly any video-classification-model without GPU.
-"""
-
-"""
-## Introduction
-
-Video Classification DL Models are heavy & need huge size of data.
-So it is time-consuming & expensive. Here it is shown, how to examine your model
-quickly, before feeding the actual data, & that also without need of any GPU.
-
-Here video dataset will be created; a white rectangle moving in different directions,
-on a black canvas. The sample code for creating left-moving-rectangle video is below.
-"""
-
-import numpy as np
-import skvideo.io as sk
-
-# creating sample video data
-num_vids = 5
-num_imgs = 50
-img_size = 50
-min_object_size = 1
-max_object_size = 5
-
-for i_vid in range(num_vids):
-    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
-    vid_name = "vid" + str(i_vid) + ".mp4"
-    w, h = np.random.randint(min_object_size, max_object_size, size=2)
-    x = np.random.randint(0, img_size - w)
-    y = np.random.randint(0, img_size - h)
-    i_img = 0
-    while x > 0:
-        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
-        x = x - 1
-        i_img = i_img + 1
-    sk.vwrite(vid_name, imgs.astype(np.uint8))
-
-"""shell
-# play a video
-from IPython.display import Video
-
-Video("vid3.mp4")  # the script & video generated should be in same folder
-"""
-"""
-## Data Generation & Preparation
-
-Now dataset with 4 classes will be created where, a rectangle is moving in 4
-different directions in those classes respectively.
-"""
-
-import numpy as np
-
-# preparing dataset
-X_train = []
-Y_train = []
-labels = {0: "left", 1: "right", 2: "up", 3: "down"}  # 4 classes
-num_vids = 30
-num_imgs = 30
-img_size = 30
-min_object_size = 1
-max_object_size = 5
-# video frames with left moving object
-for i_vid in range(num_vids):
-    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
-    # vid_name = 'vid' + str(i_vid) + '.mp4'
-    w, h = np.random.randint(min_object_size, max_object_size, size=2)
-    x = np.random.randint(0, img_size - w)
-    y = np.random.randint(0, img_size - h)
-    i_img = 0
-    while x > 0:
-        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
-        x = x - 1
-        i_img = i_img + 1
-    X_train.append(imgs)
-for i in range(0, num_imgs):
-    Y_train.append(0)
-# video frames with right moving object
-for i_vid in range(num_vids):
-    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
-    # vid_name = 'vid' + str(i_vid) + '.mp4'
-    w, h = np.random.randint(min_object_size, max_object_size, size=2)
-    x = np.random.randint(0, img_size - w)
-    y = np.random.randint(0, img_size - h)
-    i_img = 0
-    while x < img_size:
-        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
-        x = x + 1
-        i_img = i_img + 1
-    X_train.append(imgs)
-for i in range(0, num_imgs):
-    Y_train.append(1)
-# video frames with up moving object
-for i_vid in range(num_vids):
-    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
-    # vid_name = 'vid' + str(i_vid) + '.mp4'
-    w, h = np.random.randint(min_object_size, max_object_size, size=2)
-    x = np.random.randint(0, img_size - w)
-    y = np.random.randint(0, img_size - h)
-    i_img = 0
-    while y > 0:
-        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
-        y = y - 1
-        i_img = i_img + 1
-    X_train.append(imgs)
-for i in range(0, num_imgs):
-    Y_train.append(2)
-# video frames with down moving object
-for i_vid in range(num_vids):
-    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
-    # vid_name = 'vid' + str(i_vid) + '.mp4'
-    w, h = np.random.randint(min_object_size, max_object_size, size=2)
-    x = np.random.randint(0, img_size - w)
-    y = np.random.randint(0, img_size - h)
-    i_img = 0
-    while y < img_size:
-        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
-        y = y + 1
-        i_img = i_img + 1
-    X_train.append(imgs)
-for i in range(0, num_imgs):
-    Y_train.append(3)
-
-# data pre-processing
-from tensorflow.keras.utils import to_categorical
-
-X_train = np.array(X_train, dtype=np.float32) / 255
-X_train = X_train.reshape(X_train.shape[0], num_imgs, img_size, img_size, 1)
-print(X_train.shape)
-Y_train = np.array(Y_train, dtype=np.uint8)
-Y_train = Y_train.reshape(X_train.shape[0], 1)
-print(Y_train.shape)
-Y_train = to_categorical(Y_train, 4)
-
-"""
-## Model Building & Training
-
-TimeDistributed layer is used to pass temporal information to the network.
-**No GPU is needed.** Training gets completed within few minutes.
-"""
-
-# building model
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout
-from tensorflow.keras.layers import MaxPooling2D
-from tensorflow.keras.layers import LSTM
-from tensorflow.keras.layers import TimeDistributed
-
-model = Sequential()
-model.add(
-    TimeDistributed(
-        Conv2D(8, (3, 3), strides=(1, 1), activation="relu", padding="same"),
-        input_shape=(num_imgs, img_size, img_size, 1),
-    )
-)
-model.add(
-    TimeDistributed(
-        Conv2D(8, (3, 3), kernel_initializer="he_normal", activation="relu")
-    )
-)
-model.add(TimeDistributed(MaxPooling2D((1, 1), strides=(1, 1))))
-model.add(TimeDistributed(Flatten()))
-model.add(Dropout(0.3))
-model.add(LSTM(64, return_sequences=False, dropout=0.3))
-model.add(Dense(4, activation="softmax"))
-model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
-model.summary()
-# model training
-model.fit(X_train, Y_train, epochs=30, verbose=1)
-
-"""
-## Model Inferencing
-
-Model is tested on new generated video data
-"""
-
-# model testing with new data (4 videos)
-X_test = []
-Y_test = []
-for i_vid in range(2):
-    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
-    w, h = np.random.randint(min_object_size, max_object_size, size=2)
-    x = np.random.randint(0, img_size - w)
-    y = np.random.randint(0, img_size - h)
-    i_img = 0
-    while x < img_size:
-        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
-        x = x + 1
-        i_img = i_img + 1
-    X_test.append(imgs)  # 2nd class - ‘right'
-for i_vid in range(2):
-    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
-    w, h = np.random.randint(min_object_size, max_object_size, size=2)
-    x = np.random.randint(0, img_size - w)
-    y = np.random.randint(0, img_size - h)
-    i_img = 0
-    while y < img_size:
-        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
-        y = y + 1
-        i_img = i_img + 1
-    X_test.append(imgs)  # 4th class - ‘down'
-X_test = np.array(X_test, dtype=np.float32) / 255
-X_test = X_test.reshape(X_test.shape[0], num_imgs, img_size, img_size, 1)
-pred = np.argmax(model.predict(X_test), axis=-1)
-for i in range(len(X_test)):
-    print(labels[pred[i]])
-
-"""
-Clearly, the model is performing well on this dataset.
-"""

From ca430025f0563907c7d6e57d66acde08c467ecac Mon Sep 17 00:00:00 2001
From: Sujoy Kumar Goswami <sujoykumargoswami@gmail.com>
Date: Mon, 10 Jan 2022 05:45:38 +0530
Subject: [PATCH 3/3] Add files via upload

Video Classification Model Examination
---
 examples/vision/video_classify.py | 211 ++++++++++++++++++++++++++++++
 1 file changed, 211 insertions(+)
 create mode 100644 examples/vision/video_classify.py

diff --git a/examples/vision/video_classify.py b/examples/vision/video_classify.py
new file mode 100644
index 0000000000..c02e17a5fb
--- /dev/null
+++ b/examples/vision/video_classify.py
@@ -0,0 +1,211 @@
+"""
+Title: Video Classification With TimeDistributed Layer
+Author: [Sujoy K Goswami](https://www.linkedin.com/in/sujoy-kumar-goswami/)
+Date created: 2022/01/09
+Last modified: 2022/01/10
+Description: Guide to examine any video-classification-model quickly without any GPU.
+"""
+
+"""
+## Introduction
+
+Video Classification DL Models are heavy and need huge size of data.
+So it is time-consuming and expensive. Here it is shown, how to examine your model
+quickly, before feeding the actual data, and that also without need of any GPU.
+
+Here video dataset will be created; a white rectangle moving in different directions,
+on a black canvas. The sample code for creating left-moving-rectangle videos is below.
+"""
+
+import numpy as np
+import skvideo.io as sk
+from IPython.display import Video
+
+# creating sample video data
+num_vids = 5
+num_imgs = 50
+img_size = 50
+min_object_size = 1
+max_object_size = 5
+
+for i_vid in range(num_vids):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    vid_name = "vid" + str(i_vid) + ".mp4"
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while x > 0:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        x = x - 1
+        i_img = i_img + 1
+    sk.vwrite(vid_name, imgs.astype(np.uint8))
+Video(
+    "vid3.mp4"
+)  # play a video; the script and video generated should be in same folder
+
+"""
+## Data Generation and Preparation
+
+Now dataset with 4 classes will be created where, a rectangle is moving in 4
+different directions in those classes respectively.
+"""
+
+
+# preparing dataset
+X_train = []
+Y_train = []
+labels = {0: "left", 1: "right", 2: "up", 3: "down"}  # 4 classes
+num_vids = 40
+num_imgs = 40
+img_size = 40
+min_object_size = 1
+max_object_size = 5
+# video frames with left moving object
+for i_vid in range(num_vids):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    # vid_name = 'vid' + str(i_vid) + '.mp4'
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while x > 0:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        x = x - 1
+        i_img = i_img + 1
+    X_train.append(imgs)
+for i in range(0, num_imgs):
+    Y_train.append(0)
+# video frames with right moving object
+for i_vid in range(num_vids):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    # vid_name = 'vid' + str(i_vid) + '.mp4'
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while x < img_size:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        x = x + 1
+        i_img = i_img + 1
+    X_train.append(imgs)
+for i in range(0, num_imgs):
+    Y_train.append(1)
+# video frames with up moving object
+for i_vid in range(num_vids):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    # vid_name = 'vid' + str(i_vid) + '.mp4'
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while y > 0:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        y = y - 1
+        i_img = i_img + 1
+    X_train.append(imgs)
+for i in range(0, num_imgs):
+    Y_train.append(2)
+# video frames with down moving object
+for i_vid in range(num_vids):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    # vid_name = 'vid' + str(i_vid) + '.mp4'
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while y < img_size:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        y = y + 1
+        i_img = i_img + 1
+    X_train.append(imgs)
+for i in range(0, num_imgs):
+    Y_train.append(3)
+
+# data pre-processing
+from tensorflow.keras.utils import to_categorical
+
+X_train = np.array(X_train, dtype=np.float32) / 255
+X_train = X_train.reshape(X_train.shape[0], num_imgs, img_size, img_size, 1)
+print(X_train.shape)
+Y_train = np.array(Y_train, dtype=np.uint8)
+Y_train = Y_train.reshape(X_train.shape[0], 1)
+print(Y_train.shape)
+Y_train = to_categorical(Y_train, 4)
+
+"""
+## Model Building and Training
+
+TimeDistributed layer is used to pass temporal information of videos to the network.
+**No GPU is needed.** Training gets completed within few minutes.
+"""
+
+# building model
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout
+from tensorflow.keras.layers import MaxPooling2D
+from tensorflow.keras.layers import LSTM
+from tensorflow.keras.layers import TimeDistributed
+
+model = Sequential()
+model.add(
+    TimeDistributed(
+        Conv2D(8, (3, 3), strides=(1, 1), activation="relu", padding="same"),
+        input_shape=(num_imgs, img_size, img_size, 1),
+    )
+)
+model.add(
+    TimeDistributed(
+        Conv2D(8, (3, 3), kernel_initializer="he_normal", activation="relu")
+    )
+)
+model.add(TimeDistributed(MaxPooling2D((1, 1), strides=(1, 1))))
+model.add(TimeDistributed(Flatten()))
+model.add(Dropout(0.3))
+model.add(LSTM(64, return_sequences=False, dropout=0.3))
+model.add(Dense(4, activation="softmax"))
+model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
+model.summary()
+# model training
+model.fit(X_train, Y_train, epochs=40, verbose=1)
+
+"""
+## Model Inferencing
+
+Model is tested on new generated video data.
+"""
+
+# model testing with new data (4 videos)
+X_test = []
+Y_test = []
+for i_vid in range(2):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while x < img_size:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        x = x + 1
+        i_img = i_img + 1
+    X_test.append(imgs)  # 2nd class - 'right'
+for i_vid in range(2):
+    imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
+    w, h = np.random.randint(min_object_size, max_object_size, size=2)
+    x = np.random.randint(0, img_size - w)
+    y = np.random.randint(0, img_size - h)
+    i_img = 0
+    while y < img_size:
+        imgs[i_img, y : y + h, x : x + w] = 255  # set rectangle as foreground
+        y = y + 1
+        i_img = i_img + 1
+    X_test.append(imgs)  # 4th class - 'down'
+X_test = np.array(X_test, dtype=np.float32) / 255
+X_test = X_test.reshape(X_test.shape[0], num_imgs, img_size, img_size, 1)
+pred = np.argmax(model.predict(X_test), axis=-1)
+for i in range(len(X_test)):
+    print(labels[pred[i]])
+
+"""
+Clearly, the model is examined well on this synthetic dataset.
+"""