In [3]:
import os
import tempfile
import numpy as np
import pandas as pd
import cv2
import mediapipe as mp
import matplotlib.pyplot as plt
import pickle
import tensorflow as tf
import math
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import StringLookup
from tensorflow import keras
from tensorflow.keras import regularizers, layers, models, regularizers
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import (
    Dense, 
    Input, 
    concatenate,
    Flatten,    
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.applications.efficientnet import preprocess_input

In [4]:
train_dir = 'data/train'
test_dir = 'data/test'
# 비디오 파일 목록과 태그를 포함하는 리스트를 만드는 함수
def create_data_list(data_dir):
    data_list = []
    # data_dir 안의 각 디렉토리에 대해 반복
    for item in os.listdir(data_dir):
        item_path = os.path.join(data_dir, item)  # 아이템의 전체 경로
        # 해당 경로가 디렉토리인지 확인
        if os.path.isdir(item_path):
            # 디렉토리 내의 모든 파일을 나열
            for file_name in os.listdir(item_path):
                # 파일이 .mp4 파일인지 확인
                if file_name.endswith('.jpg'):
                    # 리스트에 태그와 파일 경로를 추가
                    data_list.append((item, str(data_dir+'/'+item)+'/'+file_name))
    return data_list

# 함수를 사용해서 리스트를 생성
train_list = create_data_list(train_dir)
test_list = create_data_list(test_dir)
# 리스트에서 데이터프레임을 생성
train_df = pd.DataFrame(data=train_list, columns=['tag', 'image_name'])
test_df = pd.DataFrame(data=test_list, columns=['tag', 'image_name'])
# 필요한 경우 열 순서를 수정
train_df = train_df.loc[:, ['tag', 'image_name']]
test_df = test_df.loc[:, ['tag', 'image_name']]
# 데이터프레임을 CSV 파일로 저장
train_file_path = 'train.csv'
test_file_path = 'test.csv'
train_df.to_csv(train_file_path, encoding='utf-8-sig', index=False)
test_df.to_csv(test_file_path, encoding='utf-8-sig', index=False)
train_df = pd.read_csv("train.csv") 
test_df = pd.read_csv("test.csv")
print(f"Total video for training: {len(train_df)}")
print(f"Total video for testing: {len(test_df)}")

Total video for training: 160
Total video for testing: 40


In [5]:
# 손가락 각도 계산 함수
def calculate_angles(hand_landmarks, image_shape):
    joint = np.zeros((21, 3))
    for j, lm in enumerate(hand_landmarks.landmark):
        joint[j] = [lm.x * image_shape[1], lm.y * image_shape[0], lm.z]
    
    # 벡터 계산
    v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19],:]
    v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],:]
    v = v2 - v1
    # 정규화
    v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

    # 각도 계산
    angle = np.arccos(np.einsum('nt,nt->n', v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:]))
    angle = np.degrees(angle)
    
    return angle

In [33]:
# 이미지로부터 학습 데이터 생성 및 DataFrame에 추가하는 함수
def create_training_data_from_image(image_path, df, tag):
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5)
    
    # 이미지 파일 불러오기
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    result = hands.process(img)

    # 양손의 랜드마크가 감지되었는지 확인하고 데이터 처리
    # 각 손에 대해 21개의 랜드마크에서 15개의 각도를 계산하므로 총 30개의 각도 데이터가 필요
    hand_data_row = [0] * 30  # 기본값으로 0으로 채워진 리스트를 생성
    if result.multi_hand_landmarks:
        for i, hand_landmarks in enumerate(result.multi_hand_landmarks):
            # 각도 계산
            angles = calculate_angles(hand_landmarks, img.shape[:2])
            # 각 손에 대한 데이터를 적절한 위치에 넣기
            hand_data_row[i*15:(i+1)*15] = angles

    # 이미지 파일 경로에서 라벨(직전 폴더 이름) 추출
    label = tag

    # DataFrame에 행 추가
    df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)

    return df

# 빈 DataFrame 초기화
df_hands = pd.DataFrame()

for i in range(len(train_df)):
    df_hands = create_training_data_from_image(train_df.iloc[i]['image_name'], df_hands, train_df.iloc[i]['tag'])


# 컬럼 이름 설정 (30개의 각도 + 1개의 라벨)
angle_columns = [f'angle_{i}' for i in range(30)]
df_hands.columns = angle_columns + ['label']

# 모든 데이터가 추가된 DataFrame을 CSV 파일로 저장
df_hands.to_csv('training_data.csv', index=False)

  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_inde

  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_inde

  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_inde

  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_inde

  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_inde

  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)
  df = df.append(pd.Series(hand_data_row + [label]), ignore_index=True)


In [35]:
df_hands = pd.read_csv("training_data.csv")

In [37]:
X_train = df_hands.drop('label', axis =1)
y_train = df_hands['label']

In [38]:
X_train = pd.get_dummies(X_train)

In [40]:
from sklearn.preprocessing import MinMaxScaler
mm_scaler = MinMaxScaler()
mm_scaler.fit(X_train)
mm_transform_X_train = mm_scaler.transform(X_train)
X_train = pd.DataFrame(mm_transform_X_train, columns=X_train.columns)

In [42]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
# knn,tree, rf 등 성능이 존나 구리고 adaboost모델이 그나마 괜찮음
# 다른 부스팅 모델없나 서치 후 여러 모델 불러옴
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

In [45]:
"""# 그리드서치 바탕으로 하이퍼파라미터 조정
ada_model = AdaBoostClassifier()
ada_param = {'n_estimators' : [50,150,250],
             'learning_rate' : [0.5,1]
            }
ada_grid = GridSearchCV(ada_model,ada_param,cv = 3)
ada_grid.fit(X_train,y_train)
print('최고 평균 정확도 수치: {:.4f}'.format(ada_grid.best_score_))
print('최적 하이퍼 파라미터: ', ada_grid.best_params_)
# 그리드서치 바탕으로 하이퍼파라미터 조정
final_ada_model = ada_grid.best_estimator_"""
final_Ada_model = AdaBoostClassifier(learning_rate= 0.5,
                                    n_estimators = 250)
final_Ada_model.fit(X_train,y_train)
result = cross_val_score(final_Ada_model,
                          X_train,
                          y_train,
                          cv = 5)
result.mean()

최고 평균 정확도 수치: 0.6068
최적 하이퍼 파라미터:  {'learning_rate': 0.5, 'n_estimators': 250}


0.6875

In [None]:
# 얘는 그리드 서치하다가 컴터가 맛이가려하길래 걍 여기저기서 본 것으로 긁어와서 정함
final_lgbm_model = LGBMClassifier(random_state=200, n_jobs=-1, reg_alpha=0.9, reg_lambda=0.2, n_estimators=200)
final_lgbm_model.fit(X_train, y_train)
result = cross_val_score(final_lgbm_model,
                          X_train,
                          y_train,
                          cv = 5)
result.mean()

In [None]:
"""cb_model = CatBoostClassifier()
cb_param = {"depth" : [4,6,8,10], #트리의 깊이
          "iterations" : [250,100,500,1000], #학습 반복횟수
          "learning_rate" : [0.001,0.01,0.1,0.2,0.3], #학습률
          "l2_leaf_reg" : [2,5,10,20,30], #L2규제 하이퍼파라미터
          "border_count" : [254]
          }
cb_grid = GridSearchCV (cb_model, cb_param, scoring ='accuracy', cv = 3, refit=True, n_jobs=1, verbose=2)
# scoring - 모델 성능을 평가하는데 사용할 지표(정확도)
# refit - 최적 하이퍼파라미터를 찾은 후 모델을 재학습
cb_grid.fit(X_train,y_train)
print('최고 평균 정확도 수치: {:.4f}'.format(cat_grid.best_score_))
print('최적 하이퍼 파라미터: ', cb_grid.best_params_)"""
# 얜 성능이 더 줄어들길래 다 기본값으로 
final_cb_model = CatBoostClassifier()
final_cb_model.fit(X_train, y_train)
result = cross_val_score(final_cb_model,
                          X_train,
                          y_train,
                          cv = 5)
result.mean()
# 0.875

In [None]:
"""gbm_model = GradientBoostingClassifier()
gbm_param = {"max_depth" : [4,6,8,10],
             "learning_rate" : [0.01,0.1,0.3,0.5],
             "n_estimators" : [100,300,500]
            }
gbm_grid = GridSearchCV (gbm_model, gbm_param, scoring ='accuracy', cv = 3, refit=True, n_jobs=1, verbose=2)
gbm_grid.fit(X_train,y_train)
print('최고 평균 정확도 수치: {:.4f}'.format(gbm_grid.best_score_))
print('최적 하이퍼 파라미터: ', gbm_grid.best_params_)

final_gbm_model = gbm_grid.best_estimator_
final_gbm_model.fit(X_train, y_train)
result = cross_val_score(final_gbm_model,
                          X_train,
                          y_train,
                          cv = 5)
result.mean()"""

In [None]:
"""xgb_model = XGBClassifier()
xgb_param = {"max_depth": [10,30,50],
             "min_child_weight" : [1,3,6,10], # 리프노드에 허용되는 최소 가중치 합
             "n_estimators": [200,300,500,1000]
            }
xgb_grid = GridSearchCV (xgb_model, xgb_param, scoring ='accuracy', cv = 3, refit=True, n_jobs=1, verbose=2)
xgb_grid.fit(X_train,y_train)
print('최고 평균 정확도 수치: {:.4f}'.format(xgb_grid.best_score_))
print('최적 하이퍼 파라미터: ', xgb_grid.best_params_)

final_xgb_model = xgb_grid.best_estimator_
final_xgb_model.fit(X_train, y_train)
result = cross_val_score(final_xgb_model,
                          X_train,
                          y_train,
                          cv = 5)
result.mean()"""

In [13]:
# 필요한 Mediapipe 솔루션을 초기화합니다.
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# 비디오 캡처 시작
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, img = cap.read()
    if not ret:
        print("Ignoring empty camera frame.")
        continue

    # 이미지 처리
    img = cv2.flip(img, 1)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    result = hands.process(img)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    if result.multi_hand_landmarks:
        for res in result.multi_hand_landmarks:
            joint = np.zeros((21, 3))
            for j, lm in enumerate(res.landmark):
                joint[j] = [lm.x, lm.y, lm.z]

            # 벡터 계산
            v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19],:]
            v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],:]
            v = v2 - v1
            v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

            # 각도 계산
            angle = np.arccos(np.einsum('nt,nt->n',
                v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:],
                v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:]))
            angle = np.degrees(angle)

            # NaN 값이 있을 경우 0으로 대체
            angle = np.nan_to_num(angle)

            # 데이터 차원 맞추기
            data = np.concatenate((angle, [0]*(30-len(angle)))) if len(angle) < 30 else angle
            data = data.reshape(1, -1)  # 예측을 위해 데이터를 적절한 형태로 변환
            predicted_label = knn.predict(data)

            # 예측된 라벨을 화면에 표시
            org = (int(res.landmark[0].x * img.shape[1]), int(res.landmark[0].y * img.shape[0]))
            cv2.putText(img, text=predicted_label[0], org=(org[0], org[1] + 20), 
                        fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255), thickness=2)

            # 손 랜드마크 그리기
            mp_drawing.draw_landmarks(img, res, mp_hands.HAND_CONNECTIONS)

    # 화면에 이미지 표시
    cv2.imshow('img', img)
    if cv2.waitKey(30) == 49:  # 1번 키를 누르면 종료
        break

# 사용 종료 후 자원 해제
cap.release()
cv2.destroyAllWindows()











































































