In [None]:
import cv2
import mediapipe as mp
import numpy as np

max_num_hands = 2 # 두개 손
gesture = {
    0 :'Rfist1', # 추워요 오른손
    1 :'Lfist1',  # 추워요 왼손
    2 : 'nine', # 감사해요 오른손
    3 :'3',  # 감사해요 왼손
    4 : 'ok', # 미안해요 오른손1
    5 :'5',  # 미안해요 오른손2
    6 : 'Rfist2', # 안녕하세요 오른손1
    7 :'Lfist2',  # 안녕하세요 오른손2
}

# MediaPipe hands model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    max_num_hands=max_num_hands,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

# Gesture recognition model
file = np.genfromtxt('word.csv', delimiter=',') # 학습 파일
angle = file[:,:-1].astype(np.float32)
label = file[:, -1].astype(np.float32)

knn = cv2.ml.KNearest_create()
knn.train(angle, cv2.ml.ROW_SAMPLE, label)

In [None]:
from PIL import ImageFont, ImageDraw, Image

def MediaToWord(knn):
    cap = cv2.VideoCapture(0)
    motion = 0 # 모션동작 감지
    Text = [''] # 모션 텍스트
    text = ''
    i = 0
    while cap.isOpened():
        ret, img = cap.read()
        
        if not ret:
            continue

        img = cv2.flip(img, 1)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        result = hands.process(img)

        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        if result.multi_hand_landmarks is not None:
            rps_result = []

            for res in result.multi_hand_landmarks:
                joint = np.zeros((21, 3))
                for j, lm in enumerate(res.landmark):
                    joint[j] = [lm.x, lm.y, lm.z]

                # Compute angles between joints
                v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19],:] # Parent joint
                v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],:] # Child joint
                v = v2 - v1 # [20,3]
                # Normalize v
                v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

                # Get angle using arcos of dot product 
                angle = np.arccos(np.einsum('nt,nt->n',
                    v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], 
                    v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:])) # [15,]

                angle = np.degrees(angle) # Convert radian to degree

                # Inference gesture
                data = np.array([angle], dtype=np.float32)
            
                ret, results, neighbours, dist = knn.findNearest(data, 3)
                if int(results[0][0]) == '' :
                    continue
                    
                idx = int(results[0][0])

                if idx in gesture.keys():
                    org = (int(res.landmark[0].x * img.shape[1]), int(res.landmark[0].y * img.shape[0]))
                    cv2.putText(img, text=gesture[idx].upper(), org=(org[0], org[1] + 20), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255), thickness=2)

                    rps_result.append({
                        'rps': gesture[idx],
                        'org': org
                    })

                mp_drawing.draw_landmarks(img, res, mp_hands.HAND_CONNECTIONS)

                # mean                 
                if len(rps_result) >= 2:
                    
                    # cold
                    if rps_result[0]['rps']=='Rfist1':
                        if rps_result[1]['rps']=='Lfist1'     : text = '추워요'
                    elif rps_result[0]['rps']=='Lfist1':
                        if rps_result[1]['rps']=='Rfist1'     : text = '추워요' 
                    
                    # thanks
                    elif rps_result[0]['rps']=='nine':
                        if rps_result[1]['rps']=='3'     : text = '고마워요'
                    elif rps_result[0]['rps']=='3':
                        if rps_result[1]['rps']=='nine'     : text = '고마워요'
                            
                    # Hello
                    elif rps_result[0]['rps']=='Rfist2':
                        if rps_result[1]['rps']=='Lfist2'     : text = '안녕하세요'
                    elif rps_result[0]['rps']=='Lfist2':
                        if rps_result[1]['rps']=='Rfist2'     : text = '안녕하세요'
                
                #elif len(rps_result) == 1:
                    # sorry
                    elif rps_result[0]['rps']=='ok': 
                        if motion == 0:
                            motion = 1
                    elif rps_result[0]['rps']=='5' : 
                        if motion == 1:
                            text = '미안해요'
                            Text.append(text)
                            motion = 0
                    
                    # 한글처리
                    font = ImageFont.truetype("fonts/gulim.ttc", 20)
                    img = Image.fromarray(img) # img배열을 PIL이 처리가능하게 변환
                    draw = ImageDraw.Draw(img)
                    draw.text((30,50), text, font=font, fill=(0,0,255))
                    img = np.array(img) # 다시 OpenCV가 처리가능하게 np 배열로 변환
                    
                    if Text[-1] != text :
                        Text.append(text)
                    
        cv2.imshow('print_word', img)
        
        if cv2.waitKey(1) == ord('q'):
            cap.release()
            cv2.destroyAllWindows()
            return Text

In [None]:
txt = MediaToWord(knn)
txtlist = list(dict.fromkeys(txt))
txtlist