# 1. Import and Install Dependencies

In [1]:
!pip install opencv-python mediapipe scikit-learn matplotlib





In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model): 
    #image = feed frame
    #model = Holistic model
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR -> RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB -> BGR
    return image, results

In [4]:
#connection_drawing_spec 커스텀 (optional)
def draw_styled_landmarks(image, results):
    #Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [5]:
#------------------------------------custom function 실행------------------------------------

In [19]:
def draw_custom_pose_landmarks(image, results):
    
    pose_landmark_subset = landmark_pb2.NormalizedLandmarkList(
      landmark = [
          #eye keypoints
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_EYE],#0 
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_EYE],#1
          #right arm keypoints
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_WRIST],#2
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_ELBOW],#3
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER],#4 
          #left arm keypoints
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_WRIST],#5 
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_ELBOW],#6
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER],#7
          #body keypoints
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP],#8
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP]])#9
    
    #connection = [pose_landmark_subset index1, pose_landmark_subset index2]
    pose_landmark_subset_connections = [
        [2, 3], [3, 4], #right arm
        [5, 6], [6, 7], #left arm
        [4, 7], [4, 9], [8, 9], [7, 8]] #body

    #Draw pose connections
    mp_drawing.draw_landmarks(image, 
                             landmark_list = pose_landmark_subset, 
                             connections = pose_landmark_subset_connections,
                             landmark_drawing_spec = mp_drawing.DrawingSpec(color=(98,129,205), thickness=2, circle_radius=2), 
                             connection_drawing_spec = mp_drawing.DrawingSpec(color=(122,160,255), thickness=2, circle_radius=2)
                             ) 

In [20]:
def draw_custom_hand_landmarks(image, results):
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(149,140,205), thickness=2, circle_radius=2), 
                             mp_drawing.DrawingSpec(color=(185,174,255), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(205,182,141), thickness=2, circle_radius=2), 
                             mp_drawing.DrawingSpec(color=(255,226,176), thickness=2, circle_radius=2)
                             ) 

In [21]:
def draw_custom_face_landmarks(image, results):
    #face landmark subset
    face_landmark_subset = landmark_pb2.NormalizedLandmarkList(
          landmark = [
              #right eyebrow keypoints
              results.face_landmarks.landmark[46],#0 
              results.face_landmarks.landmark[53],#1
              results.face_landmarks.landmark[52],#2
              results.face_landmarks.landmark[65],#3
              results.face_landmarks.landmark[55],#4

              #left eyebrow keypoints
              results.face_landmarks.landmark[285],#5 
              results.face_landmarks.landmark[295],#6
              results.face_landmarks.landmark[282],#7
              results.face_landmarks.landmark[283],#8
              results.face_landmarks.landmark[276],#9

              #lip keypoints
              results.face_landmarks.landmark[61],#10 
              results.face_landmarks.landmark[81],#11
              results.face_landmarks.landmark[13],#12 
              results.face_landmarks.landmark[311],#13
              results.face_landmarks.landmark[291],#14
              results.face_landmarks.landmark[402],#15 
              results.face_landmarks.landmark[14],#16
              results.face_landmarks.landmark[178]])#17
    
    #connection = {face_landmark_subset index1, face_landmark_subset index2}
    face_landmark_subset_connections = [
        [0, 1], [1, 2], [2, 3], [3, 4], #right eybrow
        [5, 6], [6, 7], [7, 8], [8, 9], #left eyebrow
        [10, 11],[11, 12], [12, 13], [13, 14], [14, 15], [15, 16], [16, 17], [17, 10]] #lip
    
    mp_drawing.draw_landmarks(image, 
                             face_landmark_subset, 
                             face_landmark_subset_connections,
                             mp_drawing.DrawingSpec(color=(112,190,205), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(139,236,255), thickness=2, circle_radius=2)
                             ) 

# 3. Extract Keypoint Values
results의 landmarks값 -> numpy array로 변환하기

In [63]:
poseIndex = np.array([22, 16, 14, 12, 24, 21, 15, 13, 11, 23])
faceIndex = np.array([61, 291, 81, 13, 311, 178, 14, 402, 285, 295, 282, 283, 276, 46, 53, 52, 65, 55])

def extract_keypoints_220(results):
    # result의 landmarks의 모든 key point values -> 하나의 numpy array 로 flatten
    #if landmarks has no value, fill numpy array with zero
    
    if results.pose_landmarks: #pose landmarks
        #pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten()
        i = 0
        pose = np.array([])
        for res in results.pose_landmarks.landmark:
            if i in poseIndex:
                pose = np.append(pose, np.array([res.x, res.y, res.z, res.visibility]))
            i = i + 1
    else:
        pose = np.zeros(132) #33*4

    if results.left_hand_landmarks: #left hand landmarks
        lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() 
    else:
        lh = np.zeros(63) #21*3

    if results.right_hand_landmarks: #right hand landmarks
        rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() 
    else:
        rh = np.zeros(63) #21*3
        
    #face landmarks
    if results.face_landmarks:
        #face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten()
        i = 0
        face = np.array([])
        for res in results.face_landmarks.landmark:
            if i in faceIndex:
                face = np.append(face, np.array([res.x, res.y, res.z]))
            i = i + 1
    else:
        face = np.zeros(54) #18*3
    
    
    #return np.concatenate([pose, lh, rh])
    return np.concatenate([pose, lh, rh, face])

In [64]:
def extract_keypoints_258(results):
    # result의 landmarks의 모든 key point values -> 하나의 numpy array 로 flatten
    #if landmarks has no value, fill numpy array with zero
    
    if results.pose_landmarks: #pose landmarks
        pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() 
    else:
        pose = np.zeros(132) #33*4

    if results.left_hand_landmarks: #left hand landmarks
        lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() 
    else:
        lh = np.zeros(63) #21*3

    if results.right_hand_landmarks: #right hand landmarks
        rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() 
    else:
        rh = np.zeros(63) #21*3
    
    
    #return np.concatenate([pose, lh, rh])
    return np.concatenate([pose, lh, rh])

In [65]:
def extract_keypoints_1662(results):
    # result의 landmarks의 모든 key point values -> 하나의 numpy array 로 flatten
    #if landmarks has no value, fill numpy array with zero
    
    if results.pose_landmarks: #pose landmarks
        pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() 
    else:
        pose = np.zeros(132) #33*4

    if results.left_hand_landmarks: #left hand landmarks
        lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() 
    else:
        lh = np.zeros(63) #21*3

    if results.right_hand_landmarks: #right hand landmarks
        rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() 
    else:
        rh = np.zeros(63) #21*3
    
    #face landmarks
    if results.face_landmarks:
        face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() 
    else:
        face = np.zeros(1404) #468*3
    
    #return np.concatenate([pose, lh, rh])
    return np.concatenate([pose, lh, rh, face])

# 4. Setup Folders for Collection

In [11]:
#pandas
!pip install xlrd
!pip install openpyxl
!pip install pandas
import pandas as pd

^C












In [66]:
import pandas as pd

In [67]:
# 파일명
file_name = '45.xlsx'

# Daraframe형식으로 엑셀 파일 읽기
df = pd.read_excel(file_name, sheet_name='Sheet1')

list_res = df.loc[:,'단어'].to_list()
print(list_res)

['가슴', '개', '귀', '내일', '누나', '다리', '동생', '뒤', '딸', '머리', '목', '물', '발', '배', '불', '뼈', '선생님', '손', '아기', '아내', '아들', '아빠', '앞', '어제', '엄마', '옆쪽', '오늘', '오른쪽', '왼쪽', '위', '친구', '코', '팔', '학교', 1, 2, 3, 4, 5, 6, 7, 8, 9, 0]


In [68]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('datas') 

#num of videos
no_sequences = 100

#actions = np.array(['가스', '가슴', '갇히다'])
actions = np.array(list_res)

# 1 Video = 50 frames
sequence_length = 30

In [69]:
len(actions)

44

# 5. Collect Keypoint Values for Training and Testing

In [70]:
##extract keypoints from video
def frame_capture(action, sequence, video):
    cap = cv2.VideoCapture(video)
    
    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        frame_num = 0
        while cap.isOpened():
            # Read feed
            ret, frame = cap.read()
            if ret == False:
                break
                
            # Make detections
            image, results = mediapipe_detection(frame, holistic)  
            # Draw landmarks
            #draw_styled_landmarks(image, results)
            draw_custom_pose_landmarks(image, results)
            draw_custom_hand_landmarks(image, results)
            draw_custom_face_landmarks(image, results)

            # Show to screen
            cv2.imshow('OpenCV Video', image)

            # export keypoints
            keypoints = extract_keypoints(results) #results of frame -> numpy array
            npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
            np.save(npy_path, keypoints) #save numpy array in directory
            
            #increase frame_num
            frame_num = frame_num + 1
                
            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
                
        cap.release()
        cv2.destroyAllWindows()

# 6. Preprocess Data and Create Labels and Features

In [71]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [72]:
label_map = {label:num for num, label in enumerate(actions)}

In [73]:
def GetKeypoint(src):
    poseIndex = np.array([22, 16, 14, 12, 24, 21, 15, 13, 11, 23])
    faceIndex = np.array([61, 291, 81, 13, 311, 178, 14, 402, 285, 295, 282, 283, 276, 46, 53, 52, 65, 55])
    result = np.array([])
    #for p in poseIndex:
    #    for i in range(0,4):
    #        result = np.append(result, src[4*p + i])
    #        
    #result = np.append(result, src[133:133+126])
    result = np.append(result, src[0:258])
    
    for f in faceIndex:
        for i in range(0,3):
            result = np.append(result, src[258 + 3*f + i])
    
    return result

In [74]:
# ------------------------양손, 양손 + 포즈, 양손 + 포즈 + 얼굴 ------------------------

import fnmatch #폴더 안에 있는 npy 개수 계산하기 위함

#create two blank arrays
sequences, labels = [], [] 
#sequences(=videos) = feature data = x data 
#labels = label data = y data

for action in actions: 
    for sequence in range(no_sequences): #each action has 60 sequences(=videos)
        window = []
        
        #폴더 안에 있는 npy 개수(= frame 수) 계산
        file_count = len(fnmatch.filter(os.listdir(os.path.join(DATA_PATH, action, str(sequence))), '*.npy'))
        
        #저장된 npy 수가 원하는 sequence_length보다 같거나 많을 경우
        if(file_count >= sequence_length):  
            #앞에 있는 npy 버리고, start부터 끝까지 sequence_length개의 npy 가져오기
            start = (file_count - sequence_length)//2
            for frame_num in range(sequence_length):
                res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(start + frame_num)))
                #res_tmp = res[132:258]     # 양손만
                #res_tmp = GetKeypoint(res)  # 포즈 + 손 + 일부 얼굴
                res_tmp = res[0:258]    # 포즈 + 양손

                window.append(res_tmp)
        #저장된 npy 수가 원하는 sequence_length보다 적을 경우
        else:
            #앞에 0으로 채운 npy 추가하기
            for frame_num in range(sequence_length - file_count):
                res = np.zeros(258) #132 + 63 + 63
                window.append(res)
            for frame_num in range(file_count):
                res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
                window.append(res)
                              
        #map label
        sequences.append(window)
        labels.append(label_map[action])

In [75]:
X = np.array(sequences)

In [76]:
X.shape

(4400, 30, 258)

In [77]:
y = to_categorical(labels).astype(int) 
#converted label from int to binary array
#1 -> [1,0,0], 2 -> [0,1,0], 3 -> [0,0,1]

In [97]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
#split x,y data into train data and test data
#train data 95%, test data size 5% 

# 7. Build and Train LSTM Neural Network

In [98]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard 

In [99]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)
#tensor board is used to monitor neural network training and it's accuracy

In [100]:
import tensorflow as tf
from tensorflow.keras.layers import Add, Input, Embedding, GlobalAveragePooling1D, Conv1D, ReLU, MaxPooling1D, LSTM, Dense, Bidirectional, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential, Model

#Residual Block 구현
def residual_block(x, filters, kernel_size=3, stride=1):
    # Shortcut Connection
    shortcut = x
    
    # Main Path
    x = Conv1D(filters, kernel_size=kernel_size, strides=stride, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    
    x = Conv1D(filters, kernel_size=kernel_size, strides=stride, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    
    x = Conv1D(filters, kernel_size=kernel_size, strides=stride, padding='same')(x)
    x = BatchNormalization()(x)
    
    # Shortcut Connection 추가
    x = Add()([x, shortcut])
    x = ReLU()(x)
    
    return x


input_layer = Input(shape=(30,258))

x = Conv1D(64, kernel_size=3, activation='relu')(input_layer)
x = BatchNormalization()(x)
x = ReLU()(x)
x = MaxPooling1D(pool_size=2)(x)

# --------------------------------------------------------
x = residual_block(x, filters=64)
x = MaxPooling1D(pool_size=2, padding='same')(x)

x = residual_block(x, filters=64)
x = MaxPooling1D(pool_size=2, padding='same')(x)

x = residual_block(x, filters=64)
x = MaxPooling1D(pool_size=2)(x)

# --------------------------------------------------------

x = Bidirectional(LSTM(64, return_sequences=True, activation='tanh'))(x)
x = Dropout(0.3)(x)
x = Bidirectional(LSTM(128, return_sequences=True, activation='tanh'))(x)
x = Dropout(0.3)(x)
x = Bidirectional(LSTM(64, return_sequences=True, activation='tanh'))(x)
x = Dropout(0.3)(x)
x = Bidirectional(LSTM(64, return_sequences=True, activation='tanh'))(x)
x = Dropout(0.3)(x)
x = LSTM(64, return_sequences=False, activation='relu')(x)


output_layer = Dense(actions.shape[0], activation='softmax')(x)
model = Model(inputs=input_layer, outputs=output_layer)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 30, 258)]    0           []                               
                                                                                                  
 conv1d_30 (Conv1D)             (None, 28, 64)       49600       ['input_4[0][0]']                
                                                                                                  
 batch_normalization_30 (BatchN  (None, 28, 64)      256         ['conv1d_30[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 re_lu_30 (ReLU)                (None, 28, 64)       0           ['batch_normalization_30[0]

 conv1d_39 (Conv1D)             (None, 4, 64)        12352       ['re_lu_38[0][0]']               
                                                                                                  
 batch_normalization_39 (BatchN  (None, 4, 64)       256         ['conv1d_39[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 add_11 (Add)                   (None, 4, 64)        0           ['batch_normalization_39[0][0]', 
                                                                  'max_pooling1d_14[0][0]']       
                                                                                                  
 re_lu_39 (ReLU)                (None, 4, 64)        0           ['add_11[0][0]']                 
                                                                                                  
 max_pooli

In [101]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [102]:
model.fit(X_train, y_train, epochs=200, callbacks=[tb_callback])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200


Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200


Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.callbacks.History at 0x2043a435780>

In [104]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_2 (Conv1D)           (None, 88, 64)            24256     
                                                                 
 batch_normalization_2 (Batc  (None, 88, 64)           256       
 hNormalization)                                                 
                                                                 
 re_lu_2 (ReLU)              (None, 88, 64)            0         
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 44, 64)           0         
 1D)                                                             
                                                                 
 lstm_29 (LSTM)              (None, 44, 512)           1181696   
                                                                 
 dropout_28 (Dropout)        (None, 44, 512)          

# 8. Make Predictions

In [35]:
predict_res = model.predict(X_test)



# 9. Save Weights

In [84]:
#단일 file/folder 에 모델의 아키텍처, 가중치 및 훈련 구성을 저장
# pose + hands : model_44_258.h5
# hands : model_44_126.h5
# pose 일부 + hands + face 일부 : model_44.220.h5
# pose + hands + face 일부 : model_44_312.h5
model.save('model_44_258(3).h5')

In [103]:
model.load_weights('model_44_258.h5')

# 10. Evaluation using Confusion Matrix and Accuracy

In [104]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [105]:
yhat = model.predict(X_test)



In [106]:
#y_test의 값들이 가리키는 label들을 하나의 리스트로 변환
#ex. [[0, 0, 1], [0, 1, 0]] -> [2, 1]
ytrue = np.argmax(y_test, axis=1).tolist()
#yhat의 값들이 가리키는 label들을 하나의 리스트로 변환
#ex. [[0.1, 0.2, 0.7], [0.1, 0.8, 0.1]] -> [2, 1]
yhat = np.argmax(yhat, axis=1).tolist()

In [107]:
#returns a confusion matrix sorted by the label order
multilabel_confusion_matrix(ytrue, yhat)

array([[[427,   0],
        [  0,  13]],

       [[433,   0],
        [  0,   7]],

       [[430,   0],
        [  0,  10]],

       [[429,   0],
        [  1,  10]],

       [[431,   0],
        [  0,   9]],

       [[431,   0],
        [  0,   9]],

       [[428,   1],
        [  0,  11]],

       [[431,   0],
        [  0,   9]],

       [[426,   0],
        [  0,  14]],

       [[430,   0],
        [  1,   9]],

       [[427,   0],
        [  0,  13]],

       [[432,   0],
        [  0,   8]],

       [[429,   0],
        [  0,  11]],

       [[431,   0],
        [  0,   9]],

       [[432,   0],
        [  0,   8]],

       [[432,   0],
        [  0,   8]],

       [[432,   0],
        [  0,   8]],

       [[428,   0],
        [  0,  12]],

       [[431,   0],
        [  0,   9]],

       [[429,   0],
        [  0,  11]],

       [[431,   0],
        [  0,   9]],

       [[425,   0],
        [  6,   9]],

       [[429,   0],
        [  1,  10]],

       [[427,   0],
        [  0, 

In [108]:
accuracy_score(ytrue, yhat)

0.9090909090909091

# 11-1. Test using video

In [109]:
#한글 텍스트 출력
from PIL import ImageFont, ImageDraw, Image

def putKoreanText(src, text, pos, font_size, font_color):
    img_pil = Image.fromarray(src)
    draw = ImageDraw.Draw(img_pil)
    font = ImageFont.truetype('C:/Users/User/ActionDetectionforSignLanguage/fonts/gulim.ttc', font_size)
    draw.text(pos, text, font=font, fill= font_color)
    return np.array(img_pil)

In [51]:
# prediction using video
def video_prediction(video):
    
    # 1. detection variables
    sequence = [] #collect 60 frames to make a sequence(=video)
    sentence = [] #concatenate history of predictions together
    threshold = 0.995

    cap = cv2.VideoCapture(video)
    
    #clear subtitles when {clear_cycle} frames passed without new subtitle added
    frames_without_new_subtitle = 0
    clear_cycle = 300
    
    word = ''
    cnt = 0
    
    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():
            # Read video
            ret, frame = cap.read()
            if ret == False:
                break
                
            # Make detections
            image, results = mediapipe_detection(frame, holistic)
            print(results)
            # Draw landmarks
            draw_styled_landmarks(image, results)

            # 2. Prediction logic
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-60:] #generate sequence with last 30 frames
            
            #frames_without_new_subtitle += 1

            if len(sequence) == 60:
                #sequence.shape = (60, 1662)
                #the input shape model expects = (number of sequences, 60, 1662)
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                print(actions[np.argmax(res)])
                
                if word == actions[np.argmax(res)]:
                    cnt = cnt + 1
                else :
                    word = actions[np.argmax(res)]
                    cnt = 0
                    

                #3. Rendering logic
                #ex. res = [0.1, 0.2, 0.7]
                #np.argmax(res) = 2, res[np.argmax(res)] = 0.7
                if res[np.argmax(res)] > threshold and cnt >= 30: 
                    #new subtitle added
                    #frames_without_new_subtitle = 0

                    cur_action_korean = actions[np.argmax(res)]

                    if len(sentence) > 0: 
                        #sentence에 저장된 prediction이 있는 경우 
                        #새로운 prediction인 경우에만 sentence에 추가
                        if cur_action_korean != sentence[-1]:
                            sentence.append(cur_action_korean)
                    else: 
                        #sentence에 저장된 prediction 없는 경우 바로 sentence에 추가
                        sentence.append(cur_action_korean)

                #sentence가 너무 길어지지 않도록 마지막 5개의 prediction만 유지
                if len(sentence) > 5: 
                    sentence = sentence[-5:]
                
                #Clear subtitles if needed
#                if frames_without_new_subtitle >= clear_cycle:
#                    sentence.clear()
                
                #Render subtitles
                cv2.rectangle(image, (0,0), (640, 40), (0, 0, 0), -1) 
#                if target == 'ko':
#                    #putKoreanText(src, text, pos, font_size, font_color
#                    image = putKoreanText(image, ' '.join(sentence), (3,10), 20, (255, 255, 255))
#                else:
#                    cv2.putText(image, ' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
                image = putKoreanText(image, ' '.join(sentence), (3,10),(20),(255,255,255))
                # Show to screen
                cv2.imshow('Video Prediction', image)

            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
                
    cap.release()
    cv2.destroyAllWindows()

In [110]:
# prediction using video
def custom_video_prediction(video):
    
    # 1. detection variables
    sequence = [] #collect 60 frames to make a sequence(=video)
    sentence = [] #concatenate history of predictions together
    threshold = 0.995

    cap = cv2.VideoCapture(video)
    
    #clear subtitles when {clear_cycle} frames passed without new subtitle added
    frames_without_new_subtitle = 0
    clear_cycle = 300
    
    word = ''
    cnt = 0
    frame_num = 0
    
    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():
            # Read video
            ret, frame = cap.read()
            if ret == False:
                break
                
            frame_num = frame_num+1
            
            if frame_num%2 == 0:
                continue

            
            # Make detections
            image, results = mediapipe_detection(frame, holistic)
            print(results)
            # Draw landmarks
            draw_styled_landmarks(image, results)
            #draw_custom_pose_landmarks(image, results)
            #draw_custom_hand_landmarks(image, results)
            #draw_custom_face_landmarks(image, results)

            # 2. Prediction logic
            keypoints = extract_keypoints_258(results)
            sequence.append(keypoints)
            sequence = sequence[-30:] #generate sequence with last 30 frames
            
            #frames_without_new_subtitle += 1

            if len(sequence) == 30:
                #sequence.shape = (60, 1662)
                #the input shape model expects = (number of sequences, 60, 1662)
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                print(actions[np.argmax(res)])
                
                if word == actions[np.argmax(res)]:
                    cnt = cnt + 1
                else :
                    word = actions[np.argmax(res)]
                    cnt = 1
                    

                #3. Rendering logic
                #ex. res = [0.1, 0.2, 0.7]
                #np.argmax(res) = 2, res[np.argmax(res)] = 0.7
                if res[np.argmax(res)] > threshold and cnt >= 10: 
                    #new subtitle added
                    #frames_without_new_subtitle = 0

                    cur_action_korean = actions[np.argmax(res)]

                    if len(sentence) > 0: 
                        #sentence에 저장된 prediction이 있는 경우 
                        #새로운 prediction인 경우에만 sentence에 추가
                        if cur_action_korean != sentence[-1]:
                            sentence.append(cur_action_korean)
                    else: 
                        #sentence에 저장된 prediction 없는 경우 바로 sentence에 추가
                        sentence.append(cur_action_korean)

                #sentence가 너무 길어지지 않도록 마지막 5개의 prediction만 유지
                if len(sentence) > 5: 
                    sentence = sentence[-5:]
                
                #Clear subtitles if needed
#                if frames_without_new_subtitle >= clear_cycle:
#                    sentence.clear()
                
                #Render subtitles
                cv2.rectangle(image, (0,0), (640, 40), (0, 0, 0), -1) 
#                if target == 'ko':
#                    #putKoreanText(src, text, pos, font_size, font_color
#                    image = putKoreanText(image, ' '.join(sentence), (3,10), 20, (255, 255, 255))
#                else:
#                    cv2.putText(image, ' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
                image = putKoreanText(image, ' '.join(sentence), (3,10),(20),(255,255,255))
                # Show to screen
                cv2.imshow('Video Prediction', image)

            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
                
    cap.release()
    cv2.destroyAllWindows()

In [112]:
curDir = "C:/Users/wjdgu/JupyterMain/ActionDetectionforSignLanguage/videos/손"
for file in os.listdir(curDir):
    if(file.endswith(".mp4")):
        path = os.path.join(curDir, file)
        custom_video_prediction(path)
        

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

KeyboardInterrupt: 

# 11-2. Test in Real Time

In [50]:
#한글 텍스트 출력
from PIL import ImageFont, ImageDraw, Image

def putKoreanText(src, text, pos, font_size, font_color):
    img_pil = Image.fromarray(src)
    draw = ImageDraw.Draw(img_pil)
    font = ImageFont.truetype('C:/Users/User/ActionDetectionforSignLanguage/fonts/gulim.ttc', font_size)
    draw.text(pos, text, font=font, fill= font_color)
    return np.array(img_pil)


In [None]:
# 1. detection variables
sequence = [] #collect 60 frames to make a sequence(=video)
sentence = [] #concatenate history of predictions together
threshold = 0.995

word = ''
cnt = 0

cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:] #generate sequence with last 30 frames
        
        if len(sequence) == 30:
            #sequence.shape = (60, 1662)
            #the input shape model expects = (number of sequences, 60, 1662)
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])

            if word == actions[np.argmax(res)]:
                cnt = cnt + 1
            else :
                word = actions[np.argmax(res)]
                cnt = 1


            #3. Rendering logic
            #ex. res = [0.1, 0.2, 0.7]
            #np.argmax(res) = 2, res[np.argmax(res)] = 0.7
            if res[np.argmax(res)] > threshold and cnt >= 10: 
                #new subtitle added
                #frames_without_new_subtitle = 0

                cur_action_korean = actions[np.argmax(res)]

                if len(sentence) > 0: 
                    #sentence에 저장된 prediction이 있는 경우 
                    #새로운 prediction인 경우에만 sentence에 추가
                    if cur_action_korean != sentence[-1]:
                        sentence.append(cur_action_korean)
                else: 
                    #sentence에 저장된 prediction 없는 경우 바로 sentence에 추가
                    sentence.append(cur_action_korean)

            #sentence가 너무 길어지지 않도록 마지막 5개의 prediction만 유지
            if len(sentence) > 5: 
                sentence = sentence[-5:]

            #Clear subtitles if needed
#                if frames_without_new_subtitle >= clear_cycle:
#                    sentence.clear()

            #Render subtitles
            cv2.rectangle(image, (0,0), (640, 40), (0, 0, 0), -1) 
#                if target == 'ko':
#                    #putKoreanText(src, text, pos, font_size, font_color
#                    image = putKoreanText(image, ' '.join(sentence), (3,10), 20, (255, 255, 255))
#                else:
#                    cv2.putText(image, ' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
            image = putKoreanText(image, ' '.join(sentence), (3,10),(20),(255,255,255))
            # Show to screen
            cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
            
    cap.release()
    cv2.destroyAllWindows()