In [9]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2 
import sklearn
from sklearn.model_selection import train_test_split

In [10]:
import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [11]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.__version__)

Num GPUs Available:  1
2.10.0


In [12]:
tf.test.is_gpu_available(
    cuda_only=False, min_cuda_compute_capability=None
)

True

In [13]:
def define_model_vgg16_lstm():
    # define model
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.TimeDistributed(tf.keras.applications.vgg16.VGG16(include_top=False, weights='imagenet', pooling='avg'), input_shape=(None, 224, 224, 3)))
    model.add(tf.keras.layers.LSTM(128, return_sequences=True))
    model.add(tf.keras.layers.LSTM(128))
    model.add(tf.keras.layers.Dense(128, activation='relu'))
    model.add(tf.keras.layers.Dense(2, activation='softmax'))
    # compile model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    # summarize model
    model.summary()
    return model

def define_model_cnn_lstm():
    # define model
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'), input_shape=(None, 224, 224, 3)))
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D((2, 2))))
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(64, (3, 3), activation='relu')))
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D((2, 2))))
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(64, (3, 3), activation='relu')))
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D((2, 2))))
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten()))
    model.add(tf.keras.layers.LSTM(128, return_sequences=True))
    model.add(tf.keras.layers.LSTM(128))
    model.add(tf.keras.layers.Dense(128, activation='relu'))
    model.add(tf.keras.layers.Dense(2, activation='softmax'))
    # compile model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    # summarize model
    model.summary()
    return model

In [14]:
from sklearn.preprocessing import LabelEncoder

# Tạo một đối tượng LabelEncoder
label_encoder = LabelEncoder()


In [15]:
# 27430 video 10s 
def load_data():
    video_path = './data/test/'
    annotation_path = './data/label_2.csv'
    annotation_read = pd.read_csv(annotation_path)
    label = annotation_read['label'].values
    name = annotation_read['name'].values
    video = []
    for i in range(len(name)):
        video.append(video_path + name[i] + '.mp4')
        
    return video, label

def load_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    cnt = 0
    while True:
        ret, frame = cap.read()
        cnt += 1
        if cnt % 10 == 0:
            if ret:
                frame = cv2.resize(frame, (224, 224))
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frames.append(frame)
            else:
                break
    cap.release()
    if len(frames) == 0:
        print(1)
    return np.array(frames)

class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=1, dim=(224,224), n_channels=3,
                 n_classes=400, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        # self.on_epoch_end()
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return len(self.list_IDs)
    
    def __getitem__(self, index):
        'Generate one batch of data'
        batch_x = self.list_IDs[index * self.batch_size:(index + 1) * self.batch_size]
        batch_y = self.labels[index * self.batch_size:(index + 1) * self.batch_size]
        
        train_video = []
        train_label = []
        for i in range(len(batch_x)):
            frames = load_video(batch_x[i])
            train_video.append(frames)
            train_label.append(batch_y[i])
        return np.array(train_video), np.array(train_label)


In [16]:
# def main():
    # data
video, label = load_data()
video_train, video_test, label_train, label_test = train_test_split(video, label, test_size=0.4)
label_train = label_encoder.fit_transform(label_train)
label_test = label_encoder.transform(label_test)
# data_gen = DataGenerator(video, label, batch_size=1)
data_gen_train = DataGenerator(video_train, label_train, batch_size=1)
data_gen_test = DataGenerator(video_test, label_test, batch_size=1)
# x_train, y_train = data_gen.__getitem__(0)
# print(x_train.shape)
# model
model = define_model_vgg16_lstm()
# train model

model.fit(data_gen_train,
                    # validation_data=data_gen_test,
                    epochs=5,batch_size=4)

# save model
model.save('./model/model.h5')
    
# if __name__ == '__main__':
#     main()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_1 (TimeDis  (None, None, 512)        14714688  
 tributed)                                                       
                                                                 
 lstm_2 (LSTM)               (None, None, 128)         328192    
                                                                 
 lstm_3 (LSTM)               (None, 128)               131584    
                                                                 
 dense_2 (Dense)             (None, 128)               16512     
                                                                 
 dense_3 (Dense)             (None, 2)                 258       
                                                                 
Total params: 15,191,234
Trainable params: 15,191,234
Non-trainable params: 0
__________________________________________

In [17]:
model.evaluate(data_gen_test)
print(model.metrics_names)

