In [1]:
import cv2
import mediapipe as mp
import numpy as np
import os
import pandas as pd
from PIL import Image
import numpy as np
import pandas as pd

In [2]:
# Kích thướng mỗi frame
IMAGE_HEIGHT , IMAGE_WIDTH = 320, 240
# số lượng frame sẽ lấy ở mỗi video
SEQUENCE_LENGTH = 20
# Những hành động sẽ nhận dạng
CLASSES_LIST = ['Your', 'Love', 'Sleep', 'Name',
                'What', 'Read', 'I', 'Bye', 'You', 'Eat', 'Hello']

# Link thư mục chứa video train
DATASET_DIR = 'PhanLoai'


In [3]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands

def frames_extraction(video_path):
    '''
    This function will extract the required frames from a video after resizing and normalizing them.
    Args:
        video_path: The path of the video in the disk, whose frames are to be extracted.
    Returns:
        frames_list: A list containing the resized and normalized frames of the video.
    '''

    video_reader = cv2.VideoCapture(video_path)
    video_frames_count = 0
    s, f = video_reader.read()
    while s:
        s, f = video_reader.read()
        video_frames_count += 1
    video_reader.release()

    # ĐỌc video

    # Calculate the the interval after which frames will be added to the list.
    skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)

    # Danh sách chứa các frame sẽ lấy
    frames_list = []

    # Iterate through the Video Frames.
    with mp_hands.Hands(
            static_image_mode=True,
            max_num_hands=2,
            min_detection_confidence=0.5) as hands:
        with mp_pose.Pose(
                min_detection_confidence=0.5,
                min_tracking_confidence=0.5) as pose:
                
            cap = cv2.VideoCapture(video_path)
            for frame_counter in range(SEQUENCE_LENGTH):
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
                success, image = cap.read()

                if not success:
                    # cap = cv2.VideoCapture(video_path)
                    # continue
                    break

                # To improve performance, optionally mark the image as not writeable to
                image.flags.writeable = True
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                hand_results = hands.process(image)
                pose_results = pose.process(image)

     
                image2 = np.ones((image.shape[0],image.shape[1],3), np.uint8)

                # # Draw the hand annotation on the image.
                if hand_results.multi_hand_landmarks:
                    for hand_landmarks in hand_results.multi_hand_landmarks:
                        mp_drawing.draw_landmarks(
                            image2,
                            hand_landmarks,
                            mp_hands.HAND_CONNECTIONS,
                            mp_drawing_styles.get_default_hand_landmarks_style(),
                            mp_drawing_styles.get_default_hand_connections_style())


                # Draw the pose annotation on the image.
                image.flags.writeable = False
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                mp_drawing.draw_landmarks(
                    image2,
                    pose_results.pose_landmarks,
                    mp_pose.POSE_CONNECTIONS,
                    landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())

                # Append to list feature

                resized_frame = cv2.resize(image2, (IMAGE_HEIGHT, IMAGE_WIDTH))
                normalized_frame = resized_frame / 255
                frames_list.append(normalized_frame)

                # Flip the image2 horizontally for a selfie-view display.
                # cv2.imshow('MediaPipe Pose', cv2.flip(image2, 1))
                # if cv2.waitKey(1) == ord('q'):
                    # break


    # Release the VideoCapture object.

    # Return the frames list.
    return frames_list


In [4]:
frames = frames_extraction('./video2242896.avi')
np.shape(frames)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


(20, 240, 320, 3)

In [5]:
# for frame in frames:
#     Image.fromarray((frame*255).astype(np.uint8), 'RGB').show()


In [6]:
def create_dataset():
    features = []
    labels = []
    video_files_paths = []

    for class_index, class_name in enumerate(CLASSES_LIST):
        
        # Display the name of the class whose data is being extracted.
        print(f'Extracting Data of Class: {class_name}')
        
        # Get the list of video files present in the specific class name directory.
        files_list = os.listdir(os.path.join(DATASET_DIR, class_name))
        
        # Iterate through all the files present in the files list.
        for file_name in files_list:
            try:
                # Get the complete video path.
                video_file_path = os.path.join(DATASET_DIR, class_name, file_name)
    
                # Extract the frames of the video file.
                frames = frames_extraction(video_file_path)
    
                # Check if the extracted frames are equal to the SEQUENCE_LENGTH specified above.
                # So ignore the vides having frames less than the SEQUENCE_LENGTH.
                if len(frames) == SEQUENCE_LENGTH:
    
                    # Append the data to their repective lists.
                    features.append(frames)
                    labels.append(class_index)
                    video_files_paths.append(video_file_path)
            except:
                continue
    
        cv2.destroyAllWindows()
 
    # Converting the list to numpy arrays
    features = np.asarray(features)
    labels = np.array(labels)  
    
    # Return the frames, class index, and video file path.
    return features, labels, video_files_paths


In [7]:
features, labels, video_files_paths = create_dataset()

Extracting Data of Class: Your
Extracting Data of Class: Love
Extracting Data of Class: Sleep
Extracting Data of Class: Name
Extracting Data of Class: What
Extracting Data of Class: Read
Extracting Data of Class: I
Extracting Data of Class: Bye
Extracting Data of Class: You
Extracting Data of Class: Eat
Extracting Data of Class: Hello


[mjpeg @ 0x559b9a15fb00] overread 8
[mjpeg @ 0x559b9a0d1040] overread 8


In [None]:
#import pickle
#with open('data_frame.pickle', 'w+') as f:
    #pickle.dump({'features': features.tolist(), 'labels': labels, 'video_files_paths': video_files_paths}, f, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
features