In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from PIL import ImageFont, ImageDraw, Image

seed=124
np.random.seed(seed)
tf.random.set_seed(seed)

font = ImageFont.truetype('fonts/gulim.ttc', 30)
colors = [(245,117,16), (117,245,16), (16,117,245), (255,227,79), (254,218,249), (0,102,51), (96,96,96)]

def prob_viz(res, actions, sentence, input_frame, colors):
    output_frame = input_frame.copy()
    
    # show prob
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
    
    # show text box
    cv2.rectangle(output_frame, (0,0), (640, 40), (245, 117, 16), -1) # 텍스트박스
    
    # show sentence text
    output_frame = Image.fromarray(output_frame)
    draw = ImageDraw.Draw(output_frame)
    draw.text( (3,3) , ' '.join(sentence), font=font, fill= (255,255,255))
    
    # show prob text
    for num, prob in enumerate(res):
        draw.text((0, 60+num*40), actions[num], font=font, fill= (255,255,255))

        
    return np.array(output_frame)
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction. Model : holistic, image를 입력해 스켈레톤을 좌표값을 생성.
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

def draw_styled_landmarks(image, results):
    # image : frame, results : model.predict(image)
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 
    
    # 함수화 : 각 좌표값을 한 ndarray로 concat.
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])


mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

# 0. Preprocess Data and Create Labels and Features

In [135]:
# Path
DATA_PATH = os.path.join('./Data/keypoint/20fps_6') 

# Thirty videos worth of data
no_sequences = 15

# Videos are going to be 30 frames in length
sequence_length = 20

actions = np.array(['나', '목', '아프다', '병원', '어디', '너', '사랑'])
label_map = {label:num for num, label in enumerate(actions)}

sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])
        
X = np.array(sequences)
y = to_categorical(labels).astype(int)

print(f'X shape : {X.shape}')
print(f'y shape : {y.shape}')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y)
print(f'Train shape : {X_train.shape, y_train.shape}')
print(f'Test shape : {X_test.shape, y_test.shape}')

X shape : (105, 20, 1662)
y shape : (105, 7)
Train shape : ((94, 20, 1662), (94, 7))
Test shape : ((11, 20, 1662), (11, 7))


# 1. Build and Train LSTM Neural Network

In [152]:
del model

In [153]:
name= '20fps_act7_11'
log_dir = os.path.join(f'./logs/{name}')
tb_callback = TensorBoard(log_dir=log_dir)

In [155]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(20,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=True, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [158]:
model.fit(X_train, y_train, epochs=50, callbacks=[tb_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x17dace269a0>

In [159]:
model_path = 'weights'
model_name = f'{name}.h5'
model.save(os.path.join(model_path, model_name))

# 2. Evaluation

In [6]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, confusion_matrix, classification_report

yhat = model.predict(X_test)
yhat = np.argmax(yhat, axis=1).tolist()

ytrue = np.argmax(y_test, axis=1).tolist()

print(confusion_matrix(ytrue, yhat))
print(classification_report(ytrue, yhat))

[[2 0 0 0 0 0 0]
 [0 1 0 0 0 0 0]
 [0 0 2 0 0 0 0]
 [0 0 0 2 0 0 0]
 [0 0 0 0 1 0 0]
 [0 1 0 0 0 1 0]
 [1 0 0 0 0 0 0]]
              precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           1       0.50      1.00      0.67         1
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         2
           4       1.00      1.00      1.00         1
           5       1.00      0.50      0.67         2
           6       0.00      0.00      0.00         1

    accuracy                           0.82        11
   macro avg       0.74      0.79      0.73        11
weighted avg       0.80      0.82      0.78        11



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
