In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder

from official.projects.movinet.modeling import movinet
from official.projects.movinet.modeling import movinet_model
from official.vision.configs import video_classification as base_cfg
from official.vision.serving import export_saved_model_lib


ModuleNotFoundError: No module named 'official'

In [None]:
IMG_SIZE = 224
NUM_FRAMES = 30

def load_video(path):
    cap = cv2.VideoCapture(path)
    frames = []
    while len(frames) < NUM_FRAMES:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    cap.release()

    if len(frames) < NUM_FRAMES:
        frames += [np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)] * (NUM_FRAMES - len(frames))
    
    return np.array(frames[:NUM_FRAMES], dtype=np.float32) / 255.0


In [None]:
def load_dataset(data_dir):
    videos = []
    labels = []
    class_names = sorted(os.listdir(data_dir))
    
    for label in class_names:
        label_dir = os.path.join(data_dir, label)
        for file in os.listdir(label_dir):
            if file.endswith('.mp4') or file.endswith('.avi'):
                video_path = os.path.join(label_dir, file)
                video = load_video(video_path)
                videos.append(video)
                labels.append(label)
    
    return np.array(videos), np.array(labels), class_names


In [None]:
train_videos, train_labels, class_names = load_dataset('../Dataset/split_data/train')
test_videos, test_labels, _ = load_dataset('../Dataset/split_data/test')

le = LabelEncoder()
train_labels = le.fit_transform(train_labels)
test_labels = le.transform(test_labels)

train_ds = tf.data.Dataset.from_tensor_slices((train_videos, train_labels)).shuffle(100).batch(4).prefetch(tf.data.AUTOTUNE)
test_ds = tf.data.Dataset.from_tensor_slices((test_videos, test_labels)).batch(4).prefetch(tf.data.AUTOTUNE)


In [None]:
backbone = movinet.Movinet(model_id='a3', causal=False)
model_cfg = backbone.default_config().clone()
model_cfg.backbone.output_states = True

base = movinet_model.MovinetClassifier(model_cfg, backbone=backbone)
inputs = tf.keras.Input(shape=(NUM_FRAMES, IMG_SIZE, IMG_SIZE, 3))
features, _ = base.backbone(inputs)


In [None]:
x = layers.Conv3D(512, (3, 3, 3), padding='same', activation='relu')(features)
x = layers.BatchNormalization()(x)
x = layers.Conv3D(256, (3, 3, 3), padding='same', activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.GlobalAveragePooling3D()(x)
output = layers.Dense(len(class_names), activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=output)
model.summary()


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

model.fit(train_ds, validation_data=test_ds, epochs=3, callbacks=[lr_callback])

model.save("movinet_football_classifier.h5")
