In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# 한국어_의료_수어_영상 압축 해제
!unzip -qq /content/drive/MyDrive/한국어_의료_수어_영상.zip -d /content/

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# 동영상에서 일정 간격으로 프레임을 추출하는 함수
def extract_evenly_spaced_frames(video_path, output_folder, num_frames=30):
    """
    num_frames: 추출할 프레임의 수
    """
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(1, total_frames // num_frames)

    count = 0
    frame_count = 0
    while cap.isOpened() and count < num_frames:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % frame_interval == 0:
            frame_path = os.path.join(output_folder, f"frame_{count:04d}.jpg")
            cv2.imwrite(frame_path, frame)
            count += 1
        frame_count += 1
    cap.release()

# 데이터셋 디렉토리 설정
dataset_dir = '/content/한국어_의료_수어'
frame_dir = '/content/frames'

# 프레임 저장 디렉토리 생성
if not os.path.exists(frame_dir):
    os.makedirs(frame_dir)

# 모든 동영상 파일에 대해 일정한 간격으로 프레임 추출
for video_file in os.listdir(dataset_dir):
    if video_file.endswith('.mp4'):
        video_path = os.path.join(dataset_dir, video_file)
        video_frame_dir = os.path.join(frame_dir, os.path.splitext(video_file)[0])
        if not os.path.exists(video_frame_dir):
            os.makedirs(video_frame_dir)
        extract_evenly_spaced_frames(video_path, video_frame_dir, num_frames=30)  # 고르게 30개의 프레임 추출

# 이미지 데이터 제너레이터 설정 (증강 강화)
datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    brightness_range=[0.8, 1.2],
    horizontal_flip=True,
    validation_split=0.2
)

# 학습 데이터와 검증 데이터 생성
train_generator = datagen.flow_from_directory(
    frame_dir,
    target_size=(224, 224),  # ResNet 입력 크기
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = datagen.flow_from_directory(
    frame_dir,
    target_size=(224, 224),  # ResNet 입력 크기
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

# 사전 훈련된 ResNet50 모델 불러오기
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# 모델 정의 (ResNet 기반 Fine-tuning)
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(train_generator.num_classes, activation='softmax')
])

# ResNet50의 일부 레이어의 동결 해제 (마지막 30개 레이어 학습 가능하게 설정)
for layer in base_model.layers[-30:]:
    layer.trainable = True

# 모델 컴파일 (학습률 조정)
model.compile(optimizer=Adam(learning_rate=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])

# 모델 학습
model.fit(
    train_generator,
    epochs=20,
    validation_data=validation_generator
)

# 예측 함수 정의 (일정한 간격으로 추출한 프레임을 기반으로 예측), 임시 프레임 삭제
def predict_sign_language(video_path, num_frames=30):
    """
    video_path: 예측할 동영상 경로
    num_frames: 예측에 사용할 프레임 수
    """
    predictions = []

    # 동영상에서 일정 간격으로 프레임 추출 및 예측 수행
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(1, total_frames // num_frames)

    count = 0
    frame_count = 0
    while cap.isOpened() and count < num_frames:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % frame_interval == 0:
            img = cv2.resize(frame, (224, 224))
            img = img / 255.0
            img = np.expand_dims(img, axis=0)

            # 예측 수행
            pred = model.predict(img)
            predictions.append(pred)
            count += 1
        frame_count += 1
    cap.release()

    # 가중 평균을 통한 예측 결과 결합
    predictions = np.mean(predictions, axis=0)
    predicted_class_index = np.argmax(predictions)
    class_labels = list(train_generator.class_indices.keys())

    return class_labels[predicted_class_index]


Found 2400 images belonging to 100 classes.
Found 600 images belonging to 100 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/20


  self._warn_if_super_not_called()


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 858ms/step - accuracy: 0.0110 - loss: 4.8802 - val_accuracy: 0.0100 - val_loss: 4.9399
Epoch 2/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 706ms/step - accuracy: 0.0236 - loss: 4.5702 - val_accuracy: 0.0100 - val_loss: 5.0928
Epoch 3/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 667ms/step - accuracy: 0.0506 - loss: 4.3382 - val_accuracy: 0.0100 - val_loss: 5.1771
Epoch 4/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 698ms/step - accuracy: 0.0852 - loss: 4.1262 - val_accuracy: 0.0100 - val_loss: 5.1810
Epoch 5/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 675ms/step - accuracy: 0.1344 - loss: 3.8611 - val_accuracy: 0.0067 - val_loss: 5.1183
Epoch 6/20
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 692ms/step - accuracy: 0.1938 - loss: 3.6382 - val_accuracy: 0.0117 - val_loss: 5.1743
Epoch 7/20
[1m75/75[0m [32m━━

In [7]:
# 모델 저장 (최신 Keras 형식)
model.save('sign_language_model.keras')

In [8]:
# 예측 예시
video_path_example = '/content/drive/MyDrive/한국어_의료_수어/궁금하다.mp4'
predicted_label = predict_sign_language(video_path_example, num_frames=30)
print(f"정답: 궁금하다 / 예측된 수어: {predicted_label}")

video_path_example = '/content/drive/MyDrive/한국어_의료_수어/덥다.mp4'
predicted_label = predict_sign_language(video_path_example, num_frames=30)
print(f"정답: 덥다 / 예측된 수어: {predicted_label}")

video_path_example = '/content/drive/MyDrive/한국어_의료_수어/마취.mp4'
predicted_label = predict_sign_language(video_path_example, num_frames=30)
print(f"정답: 마취 / 예측된 수어: {predicted_label}")

video_path_example = '/content/drive/MyDrive/한국어_의료_수어/발.mp4'
predicted_label = predict_sign_language(video_path_example, num_frames=30)
print(f"정답: 발 / 예측된 수어: {predicted_label}")

video_path_example = '/content/drive/MyDrive/한국어_의료_수어/병.mp4'
predicted_label = predict_sign_language(video_path_example, num_frames=30)
print(f"정답: 병 / 예측된 수어: {predicted_label}")

video_path_example = '/content/drive/MyDrive/한국어_의료_수어/세척.mp4'
predicted_label = predict_sign_language(video_path_example, num_frames=30)
print(f"정답: 세척 / 예측된 수어: {predicted_label}")

video_path_example = '/content/drive/MyDrive/한국어_의료_수어/수술.mp4'
predicted_label = predict_sign_language(video_path_example, num_frames=30)
print(f"정답: 수술 / 예측된 수어: {predicted_label}")

video_path_example = '/content/drive/MyDrive/한국어_의료_수어/없다.mp4'
predicted_label = predict_sign_language(video_path_example, num_frames=30)
print(f"정답: 없다 / 예측된 수어: {predicted_label}")

video_path_example = '/content/drive/MyDrive/한국어_의료_수어/춥다.mp4'
predicted_label = predict_sign_language(video_path_example, num_frames=30)
print(f"정답: 춥다 / 예측된 수어: {predicted_label}")

video_path_example = '/content/drive/MyDrive/한국어_의료_수어/피부.mp4'
predicted_label = predict_sign_language(video_path_example, num_frames=30)
print(f"정답: 피부 / 예측된 수어: {predicted_label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27