In [None]:
import cv2
import mediapipe as mp
import numpy as np
import pandas
from collections import deque
from PIL import ImageFont, ImageDraw, Image
import time
import pyautogui
import pickle
import platform
import sys
import requests
import xgboost as xgb

from matplotlib import pyplot as plt
import tensorflow as tf
from PIL import ImageFont, ImageDraw, Image
import platform


model_path = "Model/xgb_model46.json"
label_path = "./labels.csv"
keyboard_path = "./keyboard.p"

with open(keyboard_path, 'rb') as f:
    KEYBOARD_DICT = pickle.load(f)
    

# signs = ['0','안녕','소개','만나다','반갑다','나','감사']
signs = ['0','오늘','날씨','좋다','맛있다','식사']
actions = np.array(signs)

FONT_SIZE = 200 # 글자 표시 사이즈
COLOR = (255, 255, 255) # 글자 색깔
BCOLOR = (0, 0, 0)

SPEED_LIMIT = 0.05 # 손 끝 속도 기준치
TIME_FRAME = 0.2 # 속도 계산 시간차

mp_hands = mp.solutions.hands  # hand model
mp_holistic = mp.solutions.holistic  # holistic model
mp_drawing = mp.solutions.drawing_utils  # frawing utils

POSE_LANDMARKS = [11, 12, 13, 14, 15, 16]  # 오른쪽 어깨, 왼쪽 어깨, 오른쪽 팔꿈치, 왼쪽 팔꿈치, 오른쪽 손목, 왼쪽 손목

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR-CONVERSION BGR-to-RGB
    image.flags.writeable = False                  # Convert image to not-writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Convert image to writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR-COVERSION RGB-to-BGR
    return image, results

colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

def draw_styled_landmarks(image, results):
    # Draw specific pose landmarks with style
    if results.pose_landmarks:
        for idx in POSE_LANDMARKS:
            landmark = results.pose_landmarks.landmark[idx]
            x, y = int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0])
            cv2.circle(image, (x, y), 5, (80, 22, 10), -1)
    
    # Draw left hand connections with style
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections with style
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                             ) 
    
def extract_keypoints(results):
    # Extract only the specified landmarks
    pose = np.array([[res.x, res.y, res.z, res.visibility] for i, res in enumerate(results.pose_landmarks.landmark) if i in POSE_LANDMARKS]).flatten() if results.pose_landmarks else np.zeros(len(POSE_LANDMARKS) * 4)
    
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    
    return np.concatenate([pose, lh, rh])

def myPutText(src, text, pos, font_size, font_color):
    img_pil = Image.fromarray(src)
    draw = ImageDraw.Draw(img_pil)
    font_path = 'fonts/gulim.ttc'
    if platform.system() == "Darwin":
        font_path = 'AppleGothic.ttf'

    font = ImageFont.truetype(font_path, font_size)
    draw.text(pos, text, font=font, fill=font_color)
    return np.array(img_pil)

# OpenCV 이미지에 한글 그려주는 함수
def myPutText(src, text, pos, font_size, font_color):
    img_pil = Image.fromarray(src)
    draw = ImageDraw.Draw(img_pil)
    font_path = 'fonts/gulim.ttc'
    if platform.system() == "Darwin":
        font_path = 'AppleGothic.ttf'

    font = ImageFont.truetype(font_path, font_size)
    draw.text(pos, text, font=font, fill=font_color)
    return np.array(img_pil)


# 머신러닝 detection 함수
def detect_ML(input_array):
    pred = model.predict(xgb.DMatrix(input_array.reshape(1, -1)))[0]
    pred = np.argmax(pred)
    return labels["val"][pred]

# 타이핑 함수
def keyboard(cur_res_final):
    if cur_res_final == 'ㅚ':
        pyautogui.write(KEYBOARD_DICT['ㅗ'])
        pyautogui.write(KEYBOARD_DICT['ㅣ'])
    elif cur_res_final == 'ㅟ':
        pyautogui.write(KEYBOARD_DICT['ㅜ'])
        pyautogui.write(KEYBOARD_DICT['ㅣ'])
    elif cur_res_final == 'ㅢ':
        pyautogui.write(KEYBOARD_DICT['ㅡ'])
        pyautogui.write(KEYBOARD_DICT['ㅣ'])
    else:
        pyautogui.write(KEYBOARD_DICT[cur_res_final])

sequence = []
predictions = []
threshold = 0.5

model_sign = tf.keras.models.load_model('my_model.h5')
model_sign.load_weights('model.weights.h5')
model_sign.summary()

cap = cv2.VideoCapture(0)
landmark_input = deque()

cur_res_final = ""
prev_res_final = ""

labels = pandas.read_csv(label_path, index_col=0)
double_const = {"ㄱ": "ㄲ", "ㄷ": "ㄸ", "ㅂ": "ㅃ", "ㅅ": "ㅆ", "ㅈ": "ㅉ"}

final_type_store = []


input_sequence = []

cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

WIDTH = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
HEIGHT = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
print('해상도 : {} X {}'.format(WIDTH, HEIGHT))


model = xgb.Booster()
model.load_model(model_path)


prev_hand_end = np.full((5, 3), 0.5)  # 손가락 끝 좌표를 저장할 변수
prev_time = time.time()  # 속도계산을 위한 시간 저장 변수

last_input_time = time.time()
input_debounce_duration = 1

mode = 0

hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5, max_num_hands=2)
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

while True:
    ret, frame = cap.read()
    if not ret:
        print("카메라 인식 불가")
        break
    image=0
    results=0
    if(mode==0) : # 수어 모드
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model_sign.predict(np.expand_dims(sequence, axis=0))[0]
            predictions.append(np.argmax(res))
            
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    if actions[np.argmax(res)] != '0':
                        if len(input_sequence) > 0: 
                            if actions[np.argmax(res)] != input_sequence[-1]:
                                input_sequence.append(actions[np.argmax(res)])
                                sequence=[]
                        else:
                            input_sequence.append(actions[np.argmax(res)])
                            sequence=[]
        cur_res_final = ""
    elif(mode==1) : # 지문자 모드
        image, results = mediapipe_detection(frame, hands)
        if results.multi_hand_landmarks:
            for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                mp_drawing.draw_landmarks(
                    image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                hand_array = np.array(
                    [[res.x, res.y, res.z] for res in hand_landmarks.landmark])
                cur_hand_end = hand_array[[4, 8, 12, 16, 20], :]
                hand_array = hand_array[:, :2].flatten()

                dist = np.sqrt(np.sum((cur_hand_end - prev_hand_end)**2, axis=1))
                cur_time = time.time()
                if cur_time - prev_time > TIME_FRAME:
                    speed = dist / TIME_FRAME
                    if any(speed < SPEED_LIMIT):
                        cur_res_final = detect_ML(hand_array)
                        if(cur_res_final.isdigit()) : cur_res_final = ""
                    prev_time = time.time()
                prev_hand_end = cur_hand_end
        else: cur_res_final = ""

    cur_res_final_list = cur_res_final.split(",")
    cur_res_final = cur_res_final_list[-1]
    
    if cur_res_final and cur_res_final != prev_res_final and (time.time() - last_input_time > input_debounce_duration):
        keyboard(cur_res_final)
        final_type_store.append(cur_res_final)
        prev_res_final = cur_res_final
        print(cur_res_final, " ", sep="", end="")
        sys.stdout.flush()
        last_input_time = time.time()
        

    image = cv2.flip(image, 1)

    
    cv2.rectangle(image, (0, 0), (470, 90), (180, 180, 180), -1)
    cv2.rectangle(image, (0, 600), (1280, 720), (0, 0, 0), -1)

    if mode==0 :
        image = myPutText(image, "수어 인식", (40, 10), 80, BCOLOR)
    else :
        image = myPutText(image, "지문자 인식", (10, 10), 80, BCOLOR)
        image = myPutText(image, ','.join(final_type_store), (10, 605), 50, COLOR)

    image = myPutText(image, cur_res_final, (570, 30), FONT_SIZE, COLOR)

    
    image = myPutText(image, ','.join(input_sequence), (10, 660), 45, COLOR)

    cv2.imshow("Realtime Sign Language Detection", image)

    if cv2.waitKey(10) & 0xFF == 32:
        if(mode==1) : 
            mode=0
            cur_res_final = ""
            url = "https://s1r4k3949b.execute-api.ap-northeast-2.amazonaws.com/default/letterCombiner"
            query = ','.join(final_type_store)
            params = {'query': query}
            response = requests.get(url, params=params)
            if response.status_code == 200:
                if(response.text!="") :
                    input_sequence.append(response.text)
            else:
                print("서버 에러 발생", response.status_code)
            final_type_store=[]
        else : 
            mode=1
            cur_res_final = ""
    
    if cv2.waitKey(10) & 0xFF == 13:
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
final_type_store=['오늘', '날씨', '좋다', '맛있다', '식사'] 


url = "https://b1oy0766n7.execute-api.ap-northeast-2.amazonaws.com/default/words_to_sen"

query = ','.join(final_type_store)
params = {'query': query}
response = requests.get(url, params=params)
if response.status_code == 200:
    if(response.text!="") :
        print(response.text)
else:
    print("서버 에러 발생", response.status_code)
final_type_store=[]