In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import applications as kapp
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras import layers as kl
import matplotlib.pyplot as plt
from tensorflow.keras import utils
import pandas as pd
import numpy as np
import cv2
import os
import imageio
from tensorflow_docs.vis import embed

In [None]:
IMG_SIZE = 224
INPUT_SIZE = (IMG_SIZE, IMG_SIZE, 3)

SEQ_LENGTH = 20
DATASET_DIR = r'data\UCF-101'
NUM_FEATURES = 2048
EPOCHS = 128

In [None]:
# source:
# https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub

def crop_center_square(frame):
  y, x = frame.shape[0:2]
  min_dim = min(y, x)
  start_x = (x // 2) - (min_dim // 2)
  start_y = (y // 2) - (min_dim // 2)
  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def to_gif(images):
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  imageio.mimsave('./animation.gif', converted_images, fps=25)
  return embed.embed_file('./animation.gif')

In [None]:
#frames extraction

def frames_extraction(path, resize=(224, 224)):
    cap = cv2.VideoCapture(path)
    frames = []
    video_lenght = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    interval = max(int(video_lenght/SEQ_LENGTH), 1)

    try:
        for i in range(SEQ_LENGTH):
            cap.set(cv2.CAP_PROP_FRAME_COUNT, i*interval)
            success, frame = cap.read()

            if not success:
                break

            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            normalized_frame = frame / 255.0
            frames.append(normalized_frame)
    finally:
        cap.release()

    return np.array(frames)

In [None]:
def create_cnn_model():
    input = tf.keras.Input(INPUT_SIZE)
    cnn = kapp.ResNet50(pooling="avg", include_top = False, weights='imagenet', input_shape = INPUT_SIZE)
    
    preprocessed = preprocess_input(input)

    output = cnn(preprocessed)
    cnn_model = tf.keras.Model(input, output, name="ResNet50")

    cnn_model.summary()
    
    return cnn_model

In [None]:
cnn_model = create_cnn_model()

In [None]:
utils.plot_model(cnn_model, to_file="resnet50_structure_plot.png", show_shapes=True, show_layer_names=True)

In [None]:
#data preparation

test_df = pd.read_csv("data/test.csv")
train_df = pd.read_csv("data/train.csv")

label = keras.layers.StringLookup(num_oov_indices=0, vocabulary=np.unique(train_df["tag"]))
label_vocab = label.get_vocabulary()
print(label_vocab)

In [None]:
def feature_extraction(df, dir):
    samples = len(df)
    labels = df["tag"].values
    labels = (label(labels[..., None])).numpy()
    
    frame_masks = np.zeros(shape=(samples, SEQ_LENGTH), dtype="bool")
    frame_features = np.zeros(shape=(samples, SEQ_LENGTH, NUM_FEATURES), dtype="float32")

    indx = 0

    for class_name in (label_vocab):
        video_names = df.loc[df["tag"] == class_name]["video_name"]
        for video_idx, video_name in enumerate(video_names):
            video_path = os.path.join(dir, class_name, video_name)
            frames = frames_extraction(video_path)
            frames = frames[None, ...]

            temp_frame_mask = np.zeros(shape=(1, SEQ_LENGTH,), dtype="bool",)
            temp_frame_features = np.zeros(shape=(1, SEQ_LENGTH, NUM_FEATURES), dtype="float32")

            for i, batch in enumerate(frames):
                video_length = batch.shape[0]
                length = min(SEQ_LENGTH, video_length)
                for j in range(length):
                    temp_frame_features[i, j, :] = cnn_model.predict(batch[None, j, :], verbose=0,)
                temp_frame_mask[i, :length] = 1

            frame_features[indx+video_idx,] = temp_frame_features.squeeze()
            frame_masks[indx+video_idx,] = temp_frame_mask.squeeze()
            
        indx += len(video_names)

    return (frame_features, frame_masks), labels

In [None]:
train_data, train_labels = feature_extraction(train_df, DATASET_DIR)
test_data, test_labels = feature_extraction(test_df, DATASET_DIR)

In [None]:
#create model

def create_model():
    input_mask = keras.Input((SEQ_LENGTH, ), dtype="bool")
    input_features = keras.Input((SEQ_LENGTH, NUM_FEATURES))
    inputs = [input_features, input_mask]

    x = kl.LSTM(64, return_sequences=True)(input_features, mask=input_mask)
    x = kl.LSTM(32, return_sequences=True)(x)
    x = kl.LSTM(16)(x)
    x = kl.Dropout(0.4)(x)
    output = kl.Dense(len(label_vocab), activation="softmax")(x)
    model = keras.Model([inputs], output, name="LSTM_Model")

    model.summary()
    return model

In [None]:
model = create_model()

In [None]:
utils.plot_model(model, to_file="lstm_model_structure_plot.png", show_shapes=True, show_layer_names=True)

In [None]:
filepath = "/tmp/video_classifier/ckpt.weights.h5"
checkpoint = keras.callbacks.ModelCheckpoint(filepath, save_weights_only=True, save_best_only=True, verbose=1)

early_stooping_callback = keras.callbacks.EarlyStopping(monitor="accuracy", patience=10, mode="min", restore_best_weights=True)
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
model_training_history = model.fit(x=[train_data[0], train_data[1]], y=train_labels, epochs=EPOCHS, validation_split=0.3, callbacks = [checkpoint])

In [None]:
#evaluation
_, model_evaluation = model.evaluate([test_data[0], test_data[1]], test_labels)
print(f"Accuracy: {round(model_evaluation * 100, 2)}%")

In [None]:
#loss & accuracy

def plot_compare(model_history, metric_1, metric_2, title):
    value_1 = model_history.history[metric_1]
    value_2 = model_history.history[metric_2]
    x = range(len(value_1))

    plt.plot(x, value_1, label=metric_1)
    plt.plot(x, value_2, label=metric_2)
    plt.grid(True)
    plt.title(title)
    plt.legend()

In [None]:
plot_compare(model_training_history, "loss", "val_loss", "Total Loss vs Total Validation Loss")

In [None]:
plot_compare(model_training_history, "accuracy", "val_accuracy", "Total Accuracy vs Total Validation Accuracy")