In [3]:
import cv2
import numpy as np
import tensorflow as tf
import winsound
import time

# Keras 모델 로드
model = tf.keras.models.load_model('keras_model.h5')

# 음계와 대응하는 주파수 정의 (도, 레, 미, 파, 솔, 라, 시)
notes = {
    "do": 261,
    "re": 293,
    "mi": 329,
    "fa": 349,
    "sol": 392,
    "la": 440,
    "si": 493
}

# 현재 음높이 설정
current_octave = 1

def predict(frame):
    # 프레임 전처리
    img = cv2.resize(frame, (224, 224))
    img = np.expand_dims(img, axis=0)
    img = np.float32(img) / 255.0

    # 모델 예측
    predictions = model.predict(img, verbose=0)
    predicted_class = np.argmax(predictions)

    return predicted_class

def play_sound(note):
    frequency = notes[note] * current_octave
    duration = 250  # 250ms
    winsound.Beep(frequency, duration)

# 웹캠 설정
cap = cv2.VideoCapture(0)

prev_time = 0
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # 손동작 인식
    current_time = time.time()
    if current_time - prev_time >= 0.5:  # 0.5초마다 예측
        predicted_class = predict(frame)
        print(predicted_class)
        prev_time = current_time

        # 예측된 클래스에 따른 동작 수행
        if predicted_class == 0:
            play_sound("do")
        elif predicted_class == 1:
            play_sound("re")
        elif predicted_class == 2:
            play_sound("mi")
        elif predicted_class == 3:
            play_sound("fa")
        elif predicted_class == 4:
            play_sound("sol")
        elif predicted_class == 5:
            play_sound("la")
        elif predicted_class == 6:
            play_sound("si")

    # 프레임 표시
    cv2.imshow('Hand Gesture Recognition', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0
0
0
2
3
3
3
0
3
3
2
0
0
0
0
0
0
2
0
0
0
0
0
0
0
5
5
2
2
0
0
0
0
0
5
0
0
5
5
5
5
0
0
0
0
0
0
0
0
0
5
0
0
2
0
0


In [25]:
import cv2
import mediapipe as mp
import winsound
import time

# Mediapipe 설정
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# 음계와 대응하는 주파수 정의 (도, 레, 미, 파, 솔, 라, 시)
notes = {
    "thumb_folded": {"note": "도", "frequency": 261},
    "index_folded": {"note": "레", "frequency": 293},
    "middle_folded": {"note": "미", "frequency": 329},
    "ring_folded": {"note": "파", "frequency": 349},
    "pinky_folded": {"note": "솔", "frequency": 392},
    "only_thumb_stretched": {"note": "라", "frequency": 440},
    "only_index_stretched": {"note": "시", "frequency": 493}
}

def play_sound(note_info):
    frequency = note_info["frequency"]
    note = note_info["note"]
    duration = 500  # 500ms
    winsound.Beep(frequency, duration)
    print(f"Playing sound: {note}")

def is_finger_folded(hand_landmarks, finger_tip, finger_dip):
    return hand_landmarks.landmark[finger_tip].y > hand_landmarks.landmark[finger_dip].y

# 웹캠 설정
cap = cv2.VideoCapture(0)

while True:
    start_time = time.time()

    ret, frame = cap.read()
    if not ret:
        break

    # Mediapipe 프레임 변환
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = hands.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # 손가락 상태 확인
            thumb_folded = is_finger_folded(hand_landmarks, mp_hands.HandLandmark.THUMB_TIP, mp_hands.HandLandmark.THUMB_IP)
            index_folded = is_finger_folded(hand_landmarks, mp_hands.HandLandmark.INDEX_FINGER_TIP, mp_hands.HandLandmark.INDEX_FINGER_DIP)
            middle_folded = is_finger_folded(hand_landmarks, mp_hands.HandLandmark.MIDDLE_FINGER_TIP, mp_hands.HandLandmark.MIDDLE_FINGER_DIP)
            ring_folded = is_finger_folded(hand_landmarks, mp_hands.HandLandmark.RING_FINGER_TIP, mp_hands.HandLandmark.RING_FINGER_DIP)
            pinky_folded = is_finger_folded(hand_landmarks, mp_hands.HandLandmark.PINKY_TIP, mp_hands.HandLandmark.PINKY_DIP)

            if thumb_folded and not index_folded and not middle_folded and not ring_folded and not pinky_folded:
                play_sound(notes["thumb_folded"])
            elif index_folded and not thumb_folded and not middle_folded and not ring_folded and not pinky_folded:
                play_sound(notes["index_folded"])
            elif middle_folded and not thumb_folded and not index_folded and not ring_folded and not pinky_folded:
                play_sound(notes["middle_folded"])
            elif ring_folded and not thumb_folded and not index_folded and not middle_folded and not pinky_folded:
                play_sound(notes["ring_folded"])
            elif pinky_folded and not thumb_folded and not index_folded and not middle_folded and not ring_folded:
                play_sound(notes["pinky_folded"])
            elif not thumb_folded and not index_folded and middle_folded and ring_folded and not pinky_folded:
                play_sound(notes["only_thumb_stretched"])
            elif not thumb_folded and not index_folded and not middle_folded and ring_folded and pinky_folded:
                play_sound(notes["only_index_stretched"])

    # 프레임 표시
    cv2.imshow('Hand Gesture Recognition', image)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # 0.5초 대기
    elapsed_time = time.time() - start_time
    time.sleep(max(0, 0.5 - elapsed_time))

cap.release()
cv2.destroyAllWindows()
hands.close()


Playing sound: 도
Playing sound: 도
Playing sound: 도
Playing sound: 도
Playing sound: 도
Playing sound: 레
Playing sound: 레
Playing sound: 레
Playing sound: 레
Playing sound: 레
Playing sound: 미
Playing sound: 미
Playing sound: 미
Playing sound: 미
Playing sound: 미
Playing sound: 파
Playing sound: 파
Playing sound: 파
Playing sound: 파
Playing sound: 파
Playing sound: 파
Playing sound: 파
Playing sound: 파
Playing sound: 파
Playing sound: 파
Playing sound: 솔
Playing sound: 솔
Playing sound: 솔
Playing sound: 솔
Playing sound: 솔
Playing sound: 라
Playing sound: 라
Playing sound: 라
Playing sound: 라
Playing sound: 라
Playing sound: 라
Playing sound: 시
Playing sound: 시
Playing sound: 시
Playing sound: 시
Playing sound: 시
Playing sound: 시
