In [None]:
import cv2
import mediapipe as mp
import numpy as np
import time, os
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.backend import clear_session
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import load_model
from PIL import ImageFont, ImageDraw, Image

In [None]:
# 학습 데이터 모으기

actions = [
    '알다',
           '모르다',
           '좋다',
            '싫다',
              '맞다',
          '아니다',
          '틀리다',
          '있다',
          '없다',
           '만나다',
           '안녕하세요',
           '즐겁다',
           '배우다',
           '봅시다',
           '수화'
          ]
seq_length = 30
secs_for_action = 30

# MediaPipe hands model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

cap = cv2.VideoCapture(0)

created_time = int(time.time())
os.makedirs('dataset', exist_ok=True)

while cap.isOpened():
    for idx, action in enumerate(actions):
        data = []

        ret, img = cap.read()

        img = cv2.flip(img, 1)

        cv2.putText(img, f'Waiting for collecting...', org=(10, 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255), thickness=2)
        cv2.imshow('img', img)
        cv2.waitKey(3000)

        start_time = time.time()

        while time.time() - start_time < secs_for_action:
            ret, img = cap.read()

            img = cv2.flip(img, 1)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            result = hands.process(img)
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

            if result.multi_hand_landmarks is not None:
                hand_arr = []
                for res in result.multi_hand_landmarks:
                    joint = np.zeros((21, 4))
                    for j, lm in enumerate(res.landmark):
                        joint[j] = [lm.x, lm.y, lm.z, lm.visibility]

                    # Compute angles between joints
                    v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19], :3] # Parent joint
                    v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], :3] # Child joint
                    v = v2 - v1 # [20, 3]
                    # Normalize v
                    v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

                    # Get angle using arcos of dot product
                    angle = np.arccos(np.einsum('nt,nt->n',
                        v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], 
                        v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:])) # [15,]

                    angle = np.degrees(angle) # Convert radian to degree
                    angle_label = np.array(angle, dtype=np.float32)

                    d = np.concatenate([joint.flatten(), angle_label])
                    hand_arr.extend(d)
                    mp_drawing.draw_landmarks(img, res, mp_hands.HAND_CONNECTIONS)
                 
                if len(hand_arr) == 99:
                    hand_arr.extend(np.zeros(99))
                
                hand_arr = np.append(hand_arr, idx)
                data.append(hand_arr)
                
            cv2.imshow('img', img)
            if cv2.waitKey(1) == ord('q'):
                break

        data = np.array(data)
        print(action, data.shape)
        np.save(os.path.join('dataset', f'raw_{action}_{created_time}'), data)

        # Create sequence dataqq
        full_seq_data = []
        for seq in range(len(data) - seq_length):
            full_seq_data.append(data[seq:seq + seq_length])

        full_seq_data = np.array(full_seq_data)
        print(action, full_seq_data.shape)
        np.save(os.path.join('dataset', f'seq_{action}_{created_time}'), full_seq_data)
    break
    
cv2.destroyAllWindows()
cap.release()

In [None]:
# 모델링

actions = [
    '알다',
           '모르다',
           '좋다',
            '싫다',
              '맞다',
          '아니다',
          '틀리다',
          '있다',
          '없다',
           '만나다',
           '배우다',
           '봅시다',
           '수화'
          ]

data = np.concatenate([
    np.load('dataset/seq_알다_1650258083.npy'),
    np.load('dataset/seq_모르다_1650258083.npy'),
    np.load('dataset/seq_좋다_1650258083.npy'),
    np.load('dataset/seq_싫다_1650258083.npy'),
    np.load('dataset/seq_맞다_1650258083.npy'),
    np.load('dataset/seq_아니다_1650258083.npy'),
    np.load('dataset/seq_틀리다_1650258083.npy'),
    np.load('dataset/seq_있다_1650258083.npy'),
    np.load('dataset/seq_없다_1650258083.npy'),
    np.load('dataset/seq_만나다_1650258083.npy'),
    np.load('dataset/seq_배우다_1650258083.npy'),
    np.load('dataset/seq_봅시다_1650258083.npy'),
    np.load('dataset/seq_수화_1650258083.npy')], axis=0)


x = data[:,:,:-1]
y = data[:,0,-1]


y = to_categorical(y)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y)

clear_session()

model = Sequential()
model.add(LSTM(256, input_shape=(30, 198)))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(y.shape[-1], activation='softmax'))

model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
model.summary()

es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1, min_delta=0)

model.fit(x_train, y_train, validation_split=0.2, epochs=100, callbacks=[es], verbose=1)

y_pred = model.predict(x_test)
accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))

In [None]:
# test & predict

actions = [
    '알다',
           '모르다',
           '좋다',
            '싫다',
              '맞다',
          '아니다',
          '틀리다',
          '있다',
          '없다',
           '만나다',
           '안녕하세요',
           '즐겁다',
           '배우다',
           '봅시다',
           '수화'
          ]
seq_length = 30

# MediaPipe hands model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

cap = cv2.VideoCapture(0)

seq = []
action_seq = []

while cap.isOpened():
    ret, img = cap.read()
    img0 = img.copy()

    img = cv2.flip(img, 1)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    result = hands.process(img)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    if result.multi_hand_landmarks is not None:
        hand_arr = []
        for res in result.multi_hand_landmarks:
            joint = np.zeros((21, 4))
            for j, lm in enumerate(res.landmark):
                joint[j] = [lm.x, lm.y, lm.z, lm.visibility]

            # Compute angles between joints
            v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19], :3] # Parent joint
            v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], :3] # Child joint
            v = v2 - v1 # [20, 3]
            # Normalize v
            v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

            # Get angle using arcos of dot product
            angle = np.arccos(np.einsum('nt,nt->n',
                v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], 
                v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:])) # [15,]

            angle = np.degrees(angle) # Convert radian to degree

            d = np.concatenate([joint.flatten(), angle])
            hand_arr.extend(d)

            mp_drawing.draw_landmarks(img, res, mp_hands.HAND_CONNECTIONS)
            
        if len(hand_arr) == 99:
                hand_arr.extend(np.zeros(99))
        
        if len(hand_arr) > 198:
            continue
            
        seq.append(hand_arr)
        
        if len(seq) < seq_length:
            continue
            
        input_data = np.expand_dims(np.array(seq[-seq_length:], dtype=np.float32), axis=0)
        y_pred = model.predict(input_data).squeeze()
        i_pred = int(np.argmax(y_pred))
        conf = y_pred[i_pred]
        if conf < 0.9:
            continue
            
        print(i_pred)
        action = actions[i_pred]
        action_seq.append(action)
        if len(action_seq) < 3:
            continue
        this_action = '?'
        if action_seq[-1] == action_seq[-2] == action_seq[-3]:
            this_action = action
        font = ImageFont.truetype("fonts/gulim.ttc", 20)
        img = Image.fromarray(img)
        draw = ImageDraw.Draw(img)
        draw.text((30,50), this_action, font=font, fill=(0,0,255))
        img = np.array(img)

    # out.write(img0)
    # out2.write(img)
    cv2.imshow('img', img)
    if cv2.waitKey(1) == ord('q'):
        break

            
cv2.destroyAllWindows()
cap.release()