In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, TimeDistributed, Dense, LSTM, Dropout, Bidirectional, GlobalAveragePooling2D, ConvLSTM2D
from tensorflow.keras.applications import VGG16
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

def create_model(sequence_length, frame_shape, num_classes):
  #all weights frozen
    video_input = Input(shape=(sequence_length,) + frame_shape)
    base_model = EfficientNetV2S(include_top=False, input_shape=frame_shape, weights='imagenet')
    for layer in base_model.layers:
        layer.trainable = False
    encoded_frames = TimeDistributed(base_model)(video_input)
    convlstm_layer = ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=True, data_format='channels_last')(encoded_frames)
    convlstm_layer = TimeDistributed(GlobalAveragePooling2D())(convlstm_layer)
    sequence_encoded = Bidirectional(LSTM(256, return_sequences=True, kernel_regularizer=l2(0.001)))(convlstm_layer)
    sequence_encoded = Dropout(0.5)(sequence_encoded)
    frame_predictions = TimeDistributed(Dense(num_classes, activation='softmax', kernel_regularizer=l2(0.001)))(sequence_encoded)

    model = Model(inputs=video_input, outputs=frame_predictions)
    return model

sequence_length = 16
frame_shape = (224, 224, 3)
num_classes = 5

model = create_model(sequence_length, frame_shape, num_classes)
model.compile(optimizer=Adam(learning_rate=0.00006), loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit(
    train_generator,
    epochs= 200,
    validation_data=test_generator
)

#then save the model, run predicitons, error analysis etc.