# 1. Import and Install Dependencies

In [1]:
!pip install opencv-python mediapipe scikit-learn matplotlib



In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model): 
    #image = feed frame
    #model = Holistic model
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR -> RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB -> BGR
    return image, results

In [4]:
#connection_drawing_spec 커스텀 (optional)
def draw_styled_landmarks(image, results):
    #Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [5]:
#------------------------------------custom function 실행------------------------------------

In [6]:
def draw_custom_pose_landmarks(image, results):
    
    pose_landmark_subset = landmark_pb2.NormalizedLandmarkList(
      landmark = [
          #eye keypoints
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_EYE],#0 
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_EYE],#1
          #right arm keypoints
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_WRIST],#2
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_ELBOW],#3
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER],#4 
          #left arm keypoints
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_WRIST],#5 
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_ELBOW],#6
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER],#7
          #body keypoints
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP],#8
          results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP]])#9
    
    #connection = [pose_landmark_subset index1, pose_landmark_subset index2]
    pose_landmark_subset_connections = [
        [2, 3], [3, 4], #right arm
        [5, 6], [6, 7], #left arm
        [4, 7], [4, 9], [8, 9], [7, 8]] #body

    #Draw pose connections
    mp_drawing.draw_landmarks(image, 
                             landmark_list = pose_landmark_subset, 
                             connections = pose_landmark_subset_connections,
                             landmark_drawing_spec = mp_drawing.DrawingSpec(color=(98,129,205), thickness=2, circle_radius=2), 
                             connection_drawing_spec = mp_drawing.DrawingSpec(color=(122,160,255), thickness=2, circle_radius=2)
                             ) 

In [7]:
def draw_custom_hand_landmarks(image, results):
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(149,140,205), thickness=2, circle_radius=2), 
                             mp_drawing.DrawingSpec(color=(185,174,255), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(205,182,141), thickness=2, circle_radius=2), 
                             mp_drawing.DrawingSpec(color=(255,226,176), thickness=2, circle_radius=2)
                             ) 

In [8]:
def draw_custom_face_landmarks(image, results):
    #face landmark subset
    face_landmark_subset = landmark_pb2.NormalizedLandmarkList(
          landmark = [
              #right eyebrow keypoints
              results.face_landmarks.landmark[46],#0 
              results.face_landmarks.landmark[53],#1
              results.face_landmarks.landmark[52],#2
              results.face_landmarks.landmark[65],#3
              results.face_landmarks.landmark[55],#4

              #left eyebrow keypoints
              results.face_landmarks.landmark[285],#5 
              results.face_landmarks.landmark[295],#6
              results.face_landmarks.landmark[282],#7
              results.face_landmarks.landmark[283],#8
              results.face_landmarks.landmark[276],#9

              #lip keypoints
              results.face_landmarks.landmark[61],#10 
              results.face_landmarks.landmark[81],#11
              results.face_landmarks.landmark[13],#12 
              results.face_landmarks.landmark[311],#13
              results.face_landmarks.landmark[291],#14
              results.face_landmarks.landmark[402],#15 
              results.face_landmarks.landmark[14],#16
              results.face_landmarks.landmark[178]])#17
    
    #connection = {face_landmark_subset index1, face_landmark_subset index2}
    face_landmark_subset_connections = [
        [0, 1], [1, 2], [2, 3], [3, 4], #right eybrow
        [5, 6], [6, 7], [7, 8], [8, 9], #left eyebrow
        [10, 11],[11, 12], [12, 13], [13, 14], [14, 15], [15, 16], [16, 17], [17, 10]] #lip
    
    mp_drawing.draw_landmarks(image, 
                             face_landmark_subset, 
                             face_landmark_subset_connections,
                             mp_drawing.DrawingSpec(color=(112,190,205), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(139,236,255), thickness=2, circle_radius=2)
                             ) 

# 3. Extract Keypoint Values
results의 landmarks값 -> numpy array로 변환하기

In [9]:
poseIndex = np.array([22, 16, 14, 12, 24, 21, 15, 13, 11, 23])
faceIndex = np.array([61, 291, 81, 13, 311, 178, 14, 402, 285, 295, 282, 283, 276, 46, 53, 52, 65, 55])

def extract_keypoints_220(results):
    # result의 landmarks의 모든 key point values -> 하나의 numpy array 로 flatten
    #if landmarks has no value, fill numpy array with zero
    
    if results.pose_landmarks: #pose landmarks
        #pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten()
        i = 0
        pose = np.array([])
        for res in results.pose_landmarks.landmark:
            if i in poseIndex:
                pose = np.append(pose, np.array([res.x, res.y, res.z, res.visibility]))
            i = i + 1
    else:
        pose = np.zeros(132) #33*4

    if results.left_hand_landmarks: #left hand landmarks
        lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() 
    else:
        lh = np.zeros(63) #21*3

    if results.right_hand_landmarks: #right hand landmarks
        rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() 
    else:
        rh = np.zeros(63) #21*3
        
    #face landmarks
    if results.face_landmarks:
        #face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten()
        i = 0
        face = np.array([])
        for res in results.face_landmarks.landmark:
            if i in faceIndex:
                face = np.append(face, np.array([res.x, res.y, res.z]))
            i = i + 1
    else:
        face = np.zeros(54) #18*3
    
    
    #return np.concatenate([pose, lh, rh])
    return np.concatenate([pose, lh, rh, face])

In [10]:
def extract_keypoints_258(results):
    # result의 landmarks의 모든 key point values -> 하나의 numpy array 로 flatten
    #if landmarks has no value, fill numpy array with zero
    
    if results.pose_landmarks: #pose landmarks
        pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() 
    else:
        pose = np.zeros(132) #33*4

    if results.left_hand_landmarks: #left hand landmarks
        lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() 
    else:
        lh = np.zeros(63) #21*3

    if results.right_hand_landmarks: #right hand landmarks
        rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() 
    else:
        rh = np.zeros(63) #21*3
    
    
    #return np.concatenate([pose, lh, rh])
    return np.concatenate([pose, lh, rh])

In [11]:
def extract_keypoints_1662(results):
    # result의 landmarks의 모든 key point values -> 하나의 numpy array 로 flatten
    #if landmarks has no value, fill numpy array with zero
    
    if results.pose_landmarks: #pose landmarks
        pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() 
    else:
        pose = np.zeros(132) #33*4

    if results.left_hand_landmarks: #left hand landmarks
        lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() 
    else:
        lh = np.zeros(63) #21*3

    if results.right_hand_landmarks: #right hand landmarks
        rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() 
    else:
        rh = np.zeros(63) #21*3
    
    #face landmarks
    if results.face_landmarks:
        face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() 
    else:
        face = np.zeros(1404) #468*3
    
    #return np.concatenate([pose, lh, rh])
    return np.concatenate([pose, lh, rh, face])

# 4. Setup Folders for Collection

In [13]:
#pandas
!pip install xlrd
!pip install openpyxl
!pip install pandas
import pandas as pd



In [12]:
import pandas as pd

In [14]:
# 파일명
file_name = '45.xlsx'

# Daraframe형식으로 엑셀 파일 읽기
df = pd.read_excel(file_name, sheet_name='Sheet1')

list_res = df.loc[:,'단어'].to_list()
print(list_res)

['가슴', '개', '귀', '내일', '누나', '다리', '동생', '뒤', '딸', '머리', '목', '물', '발', '배', '불', '뼈', '선생님', '손', '아기', '아내', '아들', '아빠', '앞', '어제', '엄마', '옆쪽', '오늘', '오른쪽', '왼쪽', '위', '친구', '코', '팔', '학교', 1, 2, 3, 4, 5, 6, 7, 8, 9, 0]


In [13]:
# Path for exported data, numpy arrays
#DATA_PATH = os.path.join('datas') 
DATA_PATH = os.path.join('ssafy_datas')

#num of videos
no_sequences = 60

#actions = np.array(['가스', '가슴', '갇히다'])
#actions = np.array(list_res)
actions = np.array(['손흥민', '고양이', '볼하트', '최고', '걸어다닙니다'])

# 1 Video = 50 frames
sequence_length = 30

In [14]:
len(actions)

5

# 5. Collect Keypoint Values for Training and Testing

In [15]:
##extract keypoints from video
def frame_capture(action, sequence, video):
    cap = cv2.VideoCapture(video)
    
    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        frame_num = 0
        while cap.isOpened():
            # Read feed
            ret, frame = cap.read()
            if ret == False:
                break
                
            # Make detections
            image, results = mediapipe_detection(frame, holistic)  
            # Draw landmarks
            #draw_styled_landmarks(image, results)
            draw_custom_pose_landmarks(image, results)
            draw_custom_hand_landmarks(image, results)
            draw_custom_face_landmarks(image, results)

            # Show to screen
            cv2.imshow('OpenCV Video', image)

            # export keypoints
            keypoints = extract_keypoints(results) #results of frame -> numpy array
            npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
            np.save(npy_path, keypoints) #save numpy array in directory
            
            #increase frame_num
            frame_num = frame_num + 1
                
            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
                
        cap.release()
        cv2.destroyAllWindows()

# 6. Preprocess Data and Create Labels and Features

In [16]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [17]:
label_map = {label:num for num, label in enumerate(actions)}

In [18]:
def GetKeypoint(src):
    poseIndex = np.array([22, 16, 14, 12, 24, 21, 15, 13, 11, 23])
    faceIndex = np.array([61, 291, 81, 13, 311, 178, 14, 402, 285, 295, 282, 283, 276, 46, 53, 52, 65, 55])
    result = np.array([])
    #for p in poseIndex:
    #    for i in range(0,4):
    #        result = np.append(result, src[4*p + i])
    #        
    #result = np.append(result, src[133:133+126])
    result = np.append(result, src[0:258])
    
    for f in faceIndex:
        for i in range(0,3):
            result = np.append(result, src[258 + 3*f + i])
    
    return result

In [24]:
# ------------------------양손, 양손 + 포즈, 양손 + 포즈 + 얼굴 ------------------------

import fnmatch #폴더 안에 있는 npy 개수 계산하기 위함

#create two blank arrays
sequences, labels = [], [] 
#sequences(=videos) = feature data = x data 
#labels = label data = y data

for action in actions: 
    for sequence in range(no_sequences): #each action has 60 sequences(=videos)
        window = []
        
        #폴더 안에 있는 npy 개수(= frame 수) 계산
        file_count = len(fnmatch.filter(os.listdir(os.path.join(DATA_PATH, action, str(sequence))), '*.npy'))
        
        #저장된 npy 수가 원하는 sequence_length보다 같거나 많을 경우
        if(file_count >= sequence_length):  
            #앞에 있는 npy 버리고, start부터 끝까지 sequence_length개의 npy 가져오기
            start = (file_count - sequence_length)//2
            for frame_num in range(sequence_length):
                res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(start + frame_num)))
                #res_tmp = res[132:258]     # 양손만
                #res_tmp = GetKeypoint(res)  # 포즈 + 손 + 일부 얼굴
                res_tmp = res[0:258]    # 포즈 + 양손

                window.append(res_tmp)
        #저장된 npy 수가 원하는 sequence_length보다 적을 경우
        else:
            #앞에 0으로 채운 npy 추가하기
            for frame_num in range(sequence_length - file_count):
                res = np.zeros(258) #132 + 63 + 63
                window.append(res)
            for frame_num in range(file_count):
                res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
                window.append(res)
                              
        #map label
        sequences.append(window)
        labels.append(label_map[action])

In [25]:
X = np.array(sequences)

In [26]:
X.shape

(300, 30, 258)

In [27]:
y = to_categorical(labels).astype(int) 
#converted label from int to binary array
#1 -> [1,0,0], 2 -> [0,1,0], 3 -> [0,0,1]

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)
#split x,y data into train data and test data
#train data 95%, test data size 5% 

# 7. Build and Train LSTM Neural Network

In [29]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard 

In [30]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)
#tensor board is used to monitor neural network training and it's accuracy

In [34]:
# 작동 O, model_trash6.h5
import tensorflow as tf
from tensorflow.keras.layers import Add, Input, Embedding, GlobalAveragePooling1D, Conv1D, ReLU, MaxPooling1D, LSTM, Dense, Bidirectional, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential, Model

#Residual Block 구현
def residual_block(x, filters, kernel_size=3, stride=1):
    # Shortcut Connection
    shortcut = x
    
    # Main Path
    x = Conv1D(filters, kernel_size=kernel_size, strides=stride, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    
    x = Conv1D(filters, kernel_size=kernel_size, strides=stride, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    
    x = Conv1D(filters, kernel_size=kernel_size, strides=stride, padding='same')(x)
    x = BatchNormalization()(x)
    
    # Shortcut Connection 추가
    x = Add()([x, shortcut])
    x = ReLU()(x)
    
    return x


input_layer = Input(shape=(30,258))

x = Conv1D(64, kernel_size=3, activation='relu')(input_layer)
x = BatchNormalization()(x)
x = ReLU()(x)
x = MaxPooling1D(pool_size=2)(x)

# --------------------------------------------------------
x = residual_block(x, filters=64)
x = MaxPooling1D(pool_size=2, padding='same')(x)

x = residual_block(x, filters=64)
x = MaxPooling1D(pool_size=2, padding='same')(x)

x = residual_block(x, filters=64)
x = MaxPooling1D(pool_size=2)(x)

# --------------------------------------------------------

x = Bidirectional(LSTM(64, return_sequences=True, activation='tanh'))(x)
x = Dropout(0.3)(x)
x = Bidirectional(LSTM(128, return_sequences=True, activation='tanh'))(x)
x = Dropout(0.3)(x)
x = Bidirectional(LSTM(64, return_sequences=True, activation='tanh'))(x)
x = Dropout(0.3)(x)
x = Bidirectional(LSTM(64, return_sequences=True, activation='tanh'))(x)
x = Dropout(0.3)(x)
x = LSTM(64, return_sequences=False, activation='relu')(x)

#output_layer = Dense(64, activation='relu')(x)
#output_layer = Dense(32, activation='relu')(x)
output_layer = Dense(actions.shape[0], activation='softmax')(x)
model = Model(inputs=input_layer, outputs=output_layer)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 30, 258)]            0         []                            
                                                                                                  
 conv1d_10 (Conv1D)          (None, 28, 64)               49600     ['input_2[0][0]']             
                                                                                                  
 batch_normalization_10 (Ba  (None, 28, 64)               256       ['conv1d_10[0][0]']           
 tchNormalization)                                                                                
                                                                                                  
 re_lu_10 (ReLU)             (None, 28, 64)               0         ['batch_normalization_10[0

                                                                                                  
 conv1d_18 (Conv1D)          (None, 4, 64)                12352     ['re_lu_17[0][0]']            
                                                                                                  
 batch_normalization_18 (Ba  (None, 4, 64)                256       ['conv1d_18[0][0]']           
 tchNormalization)                                                                                
                                                                                                  
 re_lu_18 (ReLU)             (None, 4, 64)                0         ['batch_normalization_18[0][0]
                                                                    ']                            
                                                                                                  
 conv1d_19 (Conv1D)          (None, 4, 64)                12352     ['re_lu_18[0][0]']            
          

In [35]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [36]:
model.fit(X_train, y_train, epochs=1000, callbacks=[tb_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 

Epoch 145/1000
Epoch 146/1000
Epoch 147/1000
Epoch 148/1000
Epoch 149/1000
Epoch 150/1000
Epoch 151/1000
Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/

KeyboardInterrupt: 

In [37]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 30, 258)]            0         []                            
                                                                                                  
 conv1d_10 (Conv1D)          (None, 28, 64)               49600     ['input_2[0][0]']             
                                                                                                  
 batch_normalization_10 (Ba  (None, 28, 64)               256       ['conv1d_10[0][0]']           
 tchNormalization)                                                                                
                                                                                                  
 re_lu_10 (ReLU)             (None, 28, 64)               0         ['batch_normalization_10[0

                                                                                                  
 conv1d_18 (Conv1D)          (None, 4, 64)                12352     ['re_lu_17[0][0]']            
                                                                                                  
 batch_normalization_18 (Ba  (None, 4, 64)                256       ['conv1d_18[0][0]']           
 tchNormalization)                                                                                
                                                                                                  
 re_lu_18 (ReLU)             (None, 4, 64)                0         ['batch_normalization_18[0][0]
                                                                    ']                            
                                                                                                  
 conv1d_19 (Conv1D)          (None, 4, 64)                12352     ['re_lu_18[0][0]']            
          

# 8. Make Predictions

In [38]:
predict_res = model.predict(X_test)



# 9. Save Weights

In [39]:
#단일 file/folder 에 모델의 아키텍처, 가중치 및 훈련 구성을 저장
# pose + hands : model_44_258.h5
# hands : model_44_126.h5
# pose 일부 + hands + face 일부 : model_44.220.h5
# pose + hands + face 일부 : model_44_312.h5

#model.save('model_44_220.h5')
model.save('ssafy_model_258.h5')

  saving_api.save_model(


In [40]:
#model.load_weights('model_44_258.h5')
model.load_weights('ssafy_model_258.h5')

# 10. Evaluation using Confusion Matrix and Accuracy

In [41]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [42]:
yhat = model.predict(X_test)



In [43]:
#y_test의 값들이 가리키는 label들을 하나의 리스트로 변환
#ex. [[0, 0, 1], [0, 1, 0]] -> [2, 1]
ytrue = np.argmax(y_test, axis=1).tolist()
#yhat의 값들이 가리키는 label들을 하나의 리스트로 변환
#ex. [[0.1, 0.2, 0.7], [0.1, 0.8, 0.1]] -> [2, 1]
yhat = np.argmax(yhat, axis=1).tolist()

In [44]:
#returns a confusion matrix sorted by the label order
multilabel_confusion_matrix(ytrue, yhat)

array([[[11,  0],
        [ 0,  4]],

       [[13,  0],
        [ 0,  2]],

       [[13,  0],
        [ 0,  2]],

       [[12,  0],
        [ 0,  3]],

       [[11,  0],
        [ 0,  4]]])

In [45]:
accuracy_score(ytrue, yhat)

1.0

# 11-1. Test using video

In [46]:
#한글 텍스트 출력
from PIL import ImageFont, ImageDraw, Image

def putKoreanText(src, text, pos, font_size, font_color):
    img_pil = Image.fromarray(src)
    draw = ImageDraw.Draw(img_pil)
    font = ImageFont.truetype('/Users/ohsopp/Desktop/tensorflow/ActionDetectionforSignLanguage/fonts/gulim.ttc', font_size)
    draw.text(pos, text, font=font, fill= font_color)
    return np.array(img_pil)

In [47]:
# prediction using video
def video_prediction(video):
    
    # 1. detection variables
    sequence = [] #collect 60 frames to make a sequence(=video)
    sentence = [] #concatenate history of predictions together
    threshold = 0.995

    cap = cv2.VideoCapture(video)
    
    #clear subtitles when {clear_cycle} frames passed without new subtitle added
    frames_without_new_subtitle = 0
    clear_cycle = 300
    
    word = ''
    cnt = 0
    
    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():
            # Read video
            ret, frame = cap.read()
            if ret == False:
                break
                
            # Make detections
            image, results = mediapipe_detection(frame, holistic)
            print(results)
            # Draw landmarks
            draw_styled_landmarks(image, results)

            # 2. Prediction logic
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-60:] #generate sequence with last 30 frames
            
            #frames_without_new_subtitle += 1

            if len(sequence) == 60:
                #sequence.shape = (60, 1662)
                #the input shape model expects = (number of sequences, 60, 1662)
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                print(actions[np.argmax(res)])
                
                if word == actions[np.argmax(res)]:
                    cnt = cnt + 1
                else :
                    word = actions[np.argmax(res)]
                    cnt = 0
                    

                #3. Rendering logic
                #ex. res = [0.1, 0.2, 0.7]
                #np.argmax(res) = 2, res[np.argmax(res)] = 0.7
                if res[np.argmax(res)] > threshold and cnt >= 30: 
                    #new subtitle added
                    #frames_without_new_subtitle = 0

                    cur_action_korean = actions[np.argmax(res)]

                    if len(sentence) > 0: 
                        #sentence에 저장된 prediction이 있는 경우 
                        #새로운 prediction인 경우에만 sentence에 추가
                        if cur_action_korean != sentence[-1]:
                            sentence.append(cur_action_korean)
                    else: 
                        #sentence에 저장된 prediction 없는 경우 바로 sentence에 추가
                        sentence.append(cur_action_korean)

                #sentence가 너무 길어지지 않도록 마지막 5개의 prediction만 유지
                if len(sentence) > 5: 
                    sentence = sentence[-5:]
                
                #Clear subtitles if needed
#                if frames_without_new_subtitle >= clear_cycle:
#                    sentence.clear()
                
                #Render subtitles
                cv2.rectangle(image, (0,0), (640, 40), (0, 0, 0), -1) 
#                if target == 'ko':
#                    #putKoreanText(src, text, pos, font_size, font_color
#                    image = putKoreanText(image, ' '.join(sentence), (3,10), 20, (255, 255, 255))
#                else:
#                    cv2.putText(image, ' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
                image = putKoreanText(image, ' '.join(sentence), (3,10),(20),(255,255,255))
                # Show to screen
                cv2.imshow('Video Prediction', image)

            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
                
    cap.release()
    cv2.destroyAllWindows()

In [48]:
# prediction using video
def custom_video_prediction(video):
    
    # 1. detection variables
    sequence = [] #collect 60 frames to make a sequence(=video)
    sentence = [] #concatenate history of predictions together
    threshold = 0.995

    cap = cv2.VideoCapture(video)
    
    #clear subtitles when {clear_cycle} frames passed without new subtitle added
    frames_without_new_subtitle = 0
    clear_cycle = 300
    
    word = ''
    cnt = 0
    frame_num = 0
    
    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():
            # Read video
            ret, frame = cap.read()
            if ret == False:
                break
                
            frame_num = frame_num+1
            
            if frame_num%2 == 0:
                continue

            
            # Make detections
            image, results = mediapipe_detection(frame, holistic)
            print(results)
            # Draw landmarks
            draw_styled_landmarks(image, results)
            #draw_custom_pose_landmarks(image, results)
            #draw_custom_hand_landmarks(image, results)
            #draw_custom_face_landmarks(image, results)

            # 2. Prediction logic
            keypoints = extract_keypoints_258(results)
            sequence.append(keypoints)
            sequence = sequence[-30:] #generate sequence with last 30 frames
            
            #frames_without_new_subtitle += 1

            if len(sequence) == 30:
                #sequence.shape = (60, 1662)
                #the input shape model expects = (number of sequences, 60, 1662)
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                print(actions[np.argmax(res)])
                
                if word == actions[np.argmax(res)]:
                    cnt = cnt + 1
                else :
                    word = actions[np.argmax(res)]
                    cnt = 1
                    

                #3. Rendering logic
                #ex. res = [0.1, 0.2, 0.7]
                #np.argmax(res) = 2, res[np.argmax(res)] = 0.7
                if res[np.argmax(res)] > threshold and cnt >= 10: 
                    #new subtitle added
                    #frames_without_new_subtitle = 0

                    cur_action_korean = actions[np.argmax(res)]

                    if len(sentence) > 0: 
                        #sentence에 저장된 prediction이 있는 경우 
                        #새로운 prediction인 경우에만 sentence에 추가
                        if cur_action_korean != sentence[-1]:
                            sentence.append(cur_action_korean)
                    else: 
                        #sentence에 저장된 prediction 없는 경우 바로 sentence에 추가
                        sentence.append(cur_action_korean)

                #sentence가 너무 길어지지 않도록 마지막 5개의 prediction만 유지
                if len(sentence) > 5: 
                    sentence = sentence[-5:]
                
                #Clear subtitles if needed
#                if frames_without_new_subtitle >= clear_cycle:
#                    sentence.clear()
                
                #Render subtitles
                cv2.rectangle(image, (0,0), (640, 80), (0, 0, 0), -1) 
#                if target == 'ko':
#                    #putKoreanText(src, text, pos, font_size, font_color
#                    image = putKoreanText(image, ' '.join(sentence), (3,10), 20, (255, 255, 255))
#                else:
#                    cv2.putText(image, ' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
                image = putKoreanText(image, ' '.join(sentence), (3,10),(50),(255,255,255))
                # Show to screen
                cv2.imshow('Video Prediction', image)

            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
                
    cap.release()
    cv2.destroyAllWindows()

In [43]:
curDir = "/Users/ohsopp/Desktop/tensorflow/ActionDetectionforSignLanguage/졸프시연단어"
for i in range(100):
    for file in os.listdir(curDir):
        if(file.endswith(".mp4")):
            path = os.path.join(curDir, file)
            custom_video_prediction(path)
        

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

오른쪽
<class 'mediapipe.python.solution_base.SolutionOutputs'>
오른쪽
<class 'mediapipe.python.solution_base.SolutionOutputs'>
동생
<class 'mediapipe.python.solution_base.SolutionOutputs'>
동생
<class 'mediapipe.python.solution_base.SolutionOutputs'>
동생
<class 'mediapipe.python.solution_base.SolutionOutputs'>
2
<class 'mediapipe.python.solution_base.SolutionOutputs'>
3
<class 'mediapipe.python.solution_base.SolutionOutputs'>
3
<class 'mediapipe.python.solution_base.SolutionOutputs'>
동생
<class 'mediapipe.python.solution_base.SolutionOutputs'>
동생


KeyboardInterrupt: 

# 11-2. Test in Real Time

In [1]:
# 1. detection variables
sequence = [] #collect 60 frames to make a sequence(=video)
sentence = [] #concatenate history of predictions together
threshold = 0.995

word = ''
cnt = 0

cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        
        frame = cv2.flip(frame, 1)
        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints_258(results)
        sequence.append(keypoints)
        sequence = sequence[-30:] #generate sequence with last 30 frames
        
        if len(sequence) == 30:
            #sequence.shape = (60, 1662)
            #the input shape model expects = (number of sequences, 60, 1662)
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])

            if word == actions[np.argmax(res)]:
                cnt = cnt + 1
            else :
                word = actions[np.argmax(res)]
                cnt = 1


            #3. Rendering logic
            #ex. res = [0.1, 0.2, 0.7]
            #np.argmax(res) = 2, res[np.argmax(res)] = 0.7
            if res[np.argmax(res)] > threshold and cnt >= 10: 
                #new subtitle added
                #frames_without_new_subtitle = 0

                cur_action_korean = actions[np.argmax(res)]

                if len(sentence) > 0: 
                    #sentence에 저장된 prediction이 있는 경우 
                    #새로운 prediction인 경우에만 sentence에 추가
                    if cur_action_korean != sentence[-1]:
                        sentence.append(cur_action_korean)
                else: 
                    #sentence에 저장된 prediction 없는 경우 바로 sentence에 추가
                    sentence.append(cur_action_korean)

            #sentence가 너무 길어지지 않도록 마지막 5개의 prediction만 유지
            if len(sentence) > 1: 
                sentence = sentence[-1:]
                #print(sentence)

            #Clear subtitles if needed
#                if frames_without_new_subtitle >= clear_cycle:
#                    sentence.clear()

            #Render subtitles
            cv2.rectangle(image, (0,0), (640, 80), (0, 0, 0), -1) 
#                if target == 'ko':
#                    #putKoreanText(src, text, pos, font_size, font_color
#                    image = putKoreanText(image, ' '.join(sentence), (3,10), 20, (255, 255, 255))
#                else:
#                    cv2.putText(image, ' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
            image = putKoreanText(image, ' '.join(sentence), (3,10),(50),(255,255,255))
            # Show to screen
            cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
            
    cap.release()
    cv2.destroyAllWindows()

NameError: name 'cv2' is not defined