In [None]:
!pip install tf-models-official --quiet
!pip install remotezip tqdm opencv-python einops --quiet

In [None]:
import tqdm
import random
import pathlib
import itertools
import collections

import cv2
import einops
import pickle
import numpy as np
import remotezip as rz
import seaborn as sns
import matplotlib.pyplot as plt
import json
import keras
import tensorflow as tf
import tensorflow_hub as hub
from keras import layers
from keras.optimizers import Adam
from keras.losses import SparseCategoricalCrossentropy

In [None]:
def list_files_per_class(zip_url):
  files = []
  with rz.RemoteZip(URL) as zip:
    for zip_info in zip.infolist():
      files.append(zip_info.filename)
  return files

def get_class(fname):
  return fname.split('_')[-3]

def get_files_per_class(files):
  files_for_class = collections.defaultdict(list)
  for fname in files:
    class_name = get_class(fname)
    files_for_class[class_name].append(fname)
  return files_for_class

def download_from_zip(zip_url, to_dir, file_names):
  with rz.RemoteZip(zip_url) as zip:
    for fn in tqdm.tqdm(file_names):
      class_name = get_class(fn)
      zip.extract(fn, str(to_dir / class_name))
      unzipped_file = to_dir / class_name / fn

      fn = pathlib.Path(fn).parts[-1]
      output_file = to_dir / class_name / fn
      unzipped_file.rename(output_file,)

def split_class_lists(files_for_class, count):
  split_files = []
  remainder = {}
  for cls in files_for_class:
    split_files.extend(files_for_class[cls][:count])
    remainder[cls] = files_for_class[cls][count:]
  return split_files, remainder

def download_ufc_101_subset(zip_url, num_classes, splits, download_dir):
  files = list_files_per_class(zip_url)
  for f in files:
    tokens = f.split('/')
    if len(tokens) <= 2:
      files.remove(f) # Remove that item from the list if it does not have a filename

  files_for_class = get_files_per_class(files)

  classes = list(files_for_class.keys())[:num_classes]

  for cls in classes:
    new_files_for_class = files_for_class[cls]
    random.shuffle(new_files_for_class)
    files_for_class[cls] = new_files_for_class

  # Only use the number of classes you want in the dictionary
  files_for_class = {x: files_for_class[x] for x in list(files_for_class)[:num_classes]}

  dirs = {}
  for split_name, split_count in splits.items():
    print(split_name, ":")
    split_dir = download_dir / split_name
    split_files, files_for_class = split_class_lists(files_for_class, split_count)
    download_from_zip(zip_url, split_dir, split_files)
    dirs[split_name] = split_dir

  return dirs

def format_frames(frame, output_size):
  frame = tf.image.convert_image_dtype(frame, tf.float32)
  frame = tf.image.resize_with_pad(frame, *output_size)
  return frame

def frames_from_video_file(video_path, n_frames, output_size = (224,224), frame_step = 15):
  # Read each video frame by frame
  result = []
  src = cv2.VideoCapture(str(video_path))

  video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)

  need_length = 1 + (n_frames - 1) * frame_step

  if need_length > video_length:
    start = 0
  else:
    max_start = video_length - need_length
    start = random.randint(0, max_start + 1)
    src.set(cv2.CAP_PROP_POS_FRAMES, start)
  # ret is a boolean indicating whether read was successful, frame is the image itself
  ret, frame = src.read()
  result.append(format_frames(frame, output_size))

  for _ in range(n_frames - 1):
    for _ in range(frame_step):
      ret, frame = src.read()
    if ret:
      frame = format_frames(frame, output_size)
      result.append(frame)
    else:
      result.append(np.zeros_like(result[0]))
  src.release()
  result = np.array(result)[..., [2, 1, 0]]

  return result

In [None]:
import os

class FrameGenerator:
    def __init__(self, path, n_frames, n_classes, training = False):
#     def __init__(self, path, n_frames, training = False):
        self.path = path
        self.n_frames = n_frames
        self.n_classes = n_classes
        self.training = training
        self.class_names = os.listdir(path)
        #     self.class_names = sorted(set(p.name for p in self.path.iterdir() if p.is_dir()))
        self.class_ids_for_name = dict((name, idx) for idx, name in enumerate(self.class_names))

    def get_all_avi(self, root_dir, classes):
        lst_avi = []
        lst_cls = []
        # Iterate over each subfolder in the root directory
        for folder_name in classes:
            folder_path = os.path.join(root_dir, folder_name)

            # Check if the item in the root directory is a directory
            if os.path.isdir(folder_path):
                # Iterate over each file in the subfolder
                for file_name in os.listdir(folder_path):
                    file_path = os.path.join(folder_path, file_name)

                    # Check if the file is an AVI file
                    if file_name.endswith('.mp4'):
                        # Print the directory of the AVI file
                        lst_avi.append(file_path)
                        lst_cls.append(folder_name)
#                         print(file_path)
        return lst_avi, lst_cls
    
    def get_files_and_class_names(self):
        classes = self.class_names[:self.n_classes]
#         video_paths = self.get_all_avi(self.path, classes)
        video_paths, lst_classes = self.get_all_avi(self.path, classes)
        return video_paths, lst_classes

    def __call__(self):
        video_paths, classes = self.get_files_and_class_names()

        pairs = list(zip(video_paths, classes))

        if self.training:
            random.shuffle(pairs)

        for path, name in pairs:
            video_frames = frames_from_video_file(path, self.n_frames)
            label = self.class_ids_for_name[name] # Encode labels
            yield video_frames, label

In [None]:
URL = 'https://storage.googleapis.com/thumos14_files/UCF101_videos.zip'
download_dir = pathlib.Path('./UCF101_subset/')
# batch_size = 8
# num_frames = 16
output_signature = (tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
                    tf.TensorSpec(shape = (), dtype = tf.int16))

In [None]:
# Define the dimensions of one frame in the set of frames created
HEIGHT = 224
WIDTH = 224

In [None]:
class Conv2Plus1D(keras.layers.Layer):
  def __init__(self, filters, kernel_size, padding):
    """
      A sequence of convolutional layers that first apply the convolution operation over the
      spatial dimensions, and then the temporal dimension.
    """
    super().__init__()
    self.seq = keras.Sequential([
        # Spatial decomposition
        layers.Conv3D(filters=filters,
                      kernel_size=(1, kernel_size[1], kernel_size[2]),
                      padding=padding),
        # Temporal decomposition
        layers.Conv3D(filters=filters,
                      kernel_size=(kernel_size[0], 1, 1),
                      padding=padding)
        ])

  def call(self, x):
    return self.seq(x)

In [None]:
class ResidualMain(keras.layers.Layer):
    """
    Residual block of the model with convolution, layer normalization, and the
    activation function, ReLU.
  """
    def __init__(self, filters, kernel_size):
        super().__init__()
        self.seq = keras.Sequential([
            Conv2Plus1D(filters=filters,
                        kernel_size=kernel_size,
                        padding='same'),
            layers.LayerNormalization(),
            layers.ReLU(),
            Conv2Plus1D(filters=filters,
                        kernel_size=kernel_size,
                        padding='same'),
            layers.LayerNormalization()
        ])

    def call(self, x):
        return self.seq(x)

In [None]:
class Project(keras.layers.Layer):
    """
    Project certain dimensions of the tensor as the data is passed through different
    sized filters and downsampled.
  """
    def __init__(self, units):
        super().__init__()
        self.seq = keras.Sequential([
            layers.Dense(units),
            layers.LayerNormalization()
        ])

    def call(self, x):
        return self.seq(x)

In [None]:
def add_residual_block(input, filters, kernel_size):
    """
    Add residual blocks to the model. If the last dimensions of the input data
    and filter size does not match, project it such that last dimension matches.
  """
    out = ResidualMain(filters,
                     kernel_size)(input)

    res = input
    # Using the Keras functional APIs, project the last dimension of the tensor to
    # match the new filter size
    if out.shape[-1] != input.shape[-1]:
        res = Project(out.shape[-1])(res)

    return layers.add([res, out])

In [None]:
class ResizeVideo(keras.layers.Layer):
    def __init__(self, height, width):
        super().__init__()
        self.height = height
        self.width = width
        self.resizing_layer = layers.Resizing(self.height, self.width)

    def call(self, video):
        """
      Use the einops library to resize the tensor.

      Args:
        video: Tensor representation of the video, in the form of a set of frames.

      Return:
        A downsampled size of the video according to the new height and width it should be resized to.
    """
    # b stands for batch size, t stands for time, h stands for height,
    # w stands for width, and c stands for the number of channels.
        old_shape = einops.parse_shape(video, 'b t h w c')
        images = einops.rearrange(video, 'b t h w c -> (b t) h w c')
        images = self.resizing_layer(images)
        videos = einops.rearrange(
            images, '(b t) h w c -> b t h w c',
            t = old_shape['t'])
        return videos

In [None]:
import json

# Số lượng lớp để huấn luyện mô hình
num_classes_list = list(range(30, 46, 5))

# Số lượng khung hình
n_frames = 10

# Kích thước batch
batch_size = 8

# Đường dẫn tải xuống
subset_paths_train = "/kaggle/input/kinetics-train-5per/kinetics600_5per/kinetics600_5per/train"
subset_paths_val = "/kaggle/input/kinetics-train-5per/kinetics400_5per/kinetics400_5per/train"
num_classes = 50
coeff = 1

# Tạo một tệp tin để lưu các thông số
for num_classes in num_classes_list:
    with open(f'train_{num_classes}_classes_200ep.txt', 'w') as f:
        print(f"-------------------- {num_classes} classes --------------------")

        # Tạo tập dữ liệu huấn luyện, xác thực và kiểm tra
        train_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths_train, n_frames, num_classes, training=True),
                                                  output_signature = output_signature)
    #     print
        train_ds = train_ds.batch(batch_size)

        val_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths_val, n_frames, num_classes),
                                                output_signature = output_signature)
        val_ds = val_ds.batch(batch_size)

    #         test_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['test'], n_frames),
    #                                                  output_signature = output_signature)
    #         test_ds = test_ds.batch(batch_size)
        input_shape = (None, 10, HEIGHT, WIDTH, 3)
        input = layers.Input(shape=(input_shape[1:]))
        x = input

        x = Conv2Plus1D(filters=16, kernel_size=(3, 7, 7), padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.ReLU()(x)
        x = ResizeVideo(HEIGHT // (coeff*2), WIDTH // (coeff*2))(x)

        # Block 1
        x = add_residual_block(x, 16, (3, 3, 3))
        x = ResizeVideo(HEIGHT // (coeff*4), WIDTH // (coeff*4))(x)

        # Block 2
        x = add_residual_block(x, 32, (3, 3, 3))
        x = ResizeVideo(HEIGHT // (coeff*8), WIDTH // (coeff*8))(x)

        # Block 3
        x = add_residual_block(x, 64, (3, 3, 3))
        x = ResizeVideo(HEIGHT // (coeff*16), WIDTH // (coeff*16))(x)

        x = add_residual_block(x, 128, (3, 3, 3))
    #     x = ResizeVideo(HEIGHT // (coeff*32), WIDTH // (coeff*32))(x)
    #     x = add_residual_block(x, 256, (3, 3, 3))

        x = layers.GlobalAveragePooling3D()(x)
        x = layers.Flatten()(x)
        x = layers.Dense(num_classes)(x)

        model = keras.Model(input, x)
        # Xây dựng và biên dịch mô hình
        frames, label = next(iter(train_ds))
        model.build(frames)
        optimizer = keras.optimizers.Adam(learning_rate = 0.001)

        model.compile(loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                      optimizer=optimizer,
                      metrics = ['accuracy'])

        # Huấn luyện mô hình
        history = model.fit(x = train_ds,
                            epochs = 50,
                            validation_data = train_ds)

        # Lưu các thông số vào tệp tin
        f.write(json.dumps(history.history))
        f.write('\n')