# Import Library

In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import cv2
import os
import random
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Cek Ditektori

In [2]:
# input_path = '/kaggle/input'
# for dirname, _, filenames in os.walk(input_path):
#     print(f"Folder: {dirname}")
#     for filename in filenames:
#         print(f"- {filename}")

# Persiapan Data

## Fungsi

In [3]:
def select_random_directories(base_dir, class_num):
    all_dirs = [os.path.join(base_dir, d) for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    return random.sample(all_dirs, min(class_num, len(all_dirs)))

In [4]:
def select_all_directories(base_dir):
    return [os.path.join(base_dir, d) for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]


In [5]:
def load_video_data(class_dirs, sequence_length, frame_size):
    sequences = []
    labels = []
    label_map = {}

    for i, class_path in enumerate(class_dirs):
        class_name = os.path.basename(class_path)
        print(f"Processing class {i}: {class_name}")
        label_map[class_name] = i

        for video_name in os.listdir(class_path):
            video_path = os.path.join(class_path, video_name)
            sequence = process_video(video_path, sequence_length, frame_size)
            if sequence is not None:
                sequences.append(sequence)
                labels.append(i)

    return np.array(sequences, dtype=np.float32), np.array(labels), label_map


In [6]:
def process_video(video_path, sequence_length, frame_size):
    cap = cv2.VideoCapture(video_path)
    frames = []

    while len(frames) < sequence_length:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, frame_size)
        frame = frame / 255.0
        frames.append(frame)

    cap.release()

    if len(frames) < sequence_length:
        return None

    if len(frames) > sequence_length:
        indices = np.linspace(0, len(frames) - 1, sequence_length, dtype=int)
        frames = [frames[i] for i in indices]

    return np.array(frames)

In [7]:
def augment_video(video_tensor):
    seed = tf.random.uniform(shape=(), maxval=10000, dtype=tf.int32)

    video_tensor = tf.image.stateless_random_flip_left_right(video_tensor, seed=[seed, seed])
    video_tensor = tf.image.stateless_random_brightness(video_tensor, max_delta=0.2, seed=[seed, seed])
    video_tensor = tf.image.stateless_random_contrast(video_tensor, lower=0.8, upper=1.2, seed=[seed, seed])
    return video_tensor

In [8]:
def train_val_split(X, y, val_size, batch_size, random_state=None, augment_fn = None):
    if random_state is not None:
        np.random.seed(random_state)

    indices = np.arange(len(X))
    np.random.shuffle(indices)

    num_val = int(len(X) * val_size)
    train_indices, val_indices = indices[:-num_val], indices[-num_val:]

    def generator(indices):
        for idx in indices:
            yield X[idx], y[idx]

    train_dataset = tf.data.Dataset.from_generator(
        lambda: generator(train_indices),
        output_signature=(tf.TensorSpec(shape=X.shape[1:], dtype=tf.float32),
                          tf.TensorSpec(shape=y.shape[1:], dtype=tf.float32))
    )
    val_dataset = tf.data.Dataset.from_generator(
        lambda: generator(val_indices),
        output_signature=(tf.TensorSpec(shape=X.shape[1:], dtype=tf.float32),
                          tf.TensorSpec(shape=y.shape[1:], dtype=tf.float32))
    )

    train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return train_dataset, val_dataset

In [9]:
def get_random_video_info(data, num_samples=5):
    X, y = data
    video_info = []
    
    random_indices = random.sample(range(len(X)), num_samples)
    
    for idx in random_indices:
        video = X[idx]
        label = y[idx]
        
        video_length = len(video)
        frame_size = video.shape[1:]
        class_label = label
        
        first_frame = video[0]
        frame_shape = first_frame.shape
        
        video_info.append({
            'video_index': idx,
            'video_length': video_length,
            'frame_size': frame_size,
            'first_frame_shape': frame_shape,
            'label': class_label
        })
    
    return video_info

## Variabel

In [13]:
BASE_DIR = 'SL1/'
CLASS_NUM = 140

SEQUENCE_LENGTH = 30
FRAME_SIZE = (224, 224)

VAL_SIZE = 0.2
RANDOM_SEED = 21

BATCH_SIZE = 10
BUFFER_SIZE = 1000

LABEL_LIST = []

## Proses

In [14]:
class_dirs = select_random_directories(BASE_DIR,CLASS_NUM)

In [None]:
X, y, label_map = load_video_data(class_dirs, SEQUENCE_LENGTH, FRAME_SIZE)

Processing class 0: purple
Processing class 1: morning
Processing class 2: child
Processing class 3: doctor
Processing class 4: house
Processing class 5: two
Processing class 6: hello


In [None]:
video_info = get_random_video_info((X, y))
for info in video_info:
    print(f"Video Index: {info['video_index']}")
    print(f"Video Length: {info['video_length']} frames")
    print(f"Frame Size: {info['frame_size']}")
    print(f"First Frame Shape: {info['first_frame_shape']}")
    print(f"Label: {info['label']}")
    print("-" * 50)

In [13]:
train_dataset, val_dataset = train_val_split(X, y, VAL_SIZE, BATCH_SIZE, RANDOM_SEED, augment_video)

# Pelatihan Model

## Fungsi

In [None]:
def create_model(class_num, sequence_length, frame_size):
    inputs = layers.Input(shape=(sequence_length, *frame_size, 3))
    
    x = layers.Conv3D(16, kernel_size=(3, 3, 3), activation='relu', padding='valid')(inputs)
    # x = layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu', padding='valid')(x)
    x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
    x = layers.Dropout(0.2)(x)
    
    x = layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu', padding='valid')(x)
    # x = layers.Conv3D(64, kernel_size=(3, 3, 3), activation='relu', padding='valid')(x)
    x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
    x = layers.Dropout(0.2)(x)
    
    x = layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu', padding='valid')(x)
    # x = layers.Conv3D(64, kernel_size=(3, 3, 3), activation='relu', padding='valid')(x)
    x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu', padding='valid')(x)
    # x = layers.Conv3D(64, kernel_size=(3, 3, 3), activation='relu', padding='valid')(x)
    # x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
    x = layers.Dropout(0.2)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(96, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    
    outputs = layers.Dense(class_num, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model

In [None]:
def train_model(model, train_dataset, val_dataset, batch_size, epochs):
    model.compile(
        optimizer=tf.keras.optimizers.AdamW(learning_rate=3e-4, weight_decay=5e-2),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    history = model.fit(
        train_dataset,
        validation_data=val_dataset,
        epochs=epochs,
        callbacks=[]
    )

    return history

In [None]:
def plot_history(history):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

    ax1.plot(loss, label='Training Loss')
    ax1.plot(val_loss, label='Validation Loss')
    ax1.set_title('Loss')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.legend()

    ax2.plot(accuracy, label='Training Accuracy')
    ax2.plot(val_accuracy, label='Validation Accuracy')
    ax2.set_title('Accuracy')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Accuracy')
    ax2.legend()

    plt.tight_layout()
    plt.show()

## Variable

In [None]:
EPOCHS = 30

## Proses

In [None]:
model = create_model(CLASS_NUM, SEQUENCE_LENGTH, FRAME_SIZE)

model.summary()

In [None]:
history = train_model(model, train_dataset, val_dataset, BATCH_SIZE, EPOCHS)

In [None]:
plot_history(history)

In [None]:
model.save("sign_language_video_model.h5")

In [None]:
print(label_map) 

In [68]:
# label_map = {'train': 0, 'ready': 1, 'hospital': 2, 'school': 3, 'no': 4, 'again': 5, 'chair': 6, 'tomorrow': 7, 'three': 8, 'tired': 9, 'week': 10, 'year': 11, 'black': 12, 'some': 13, 'drive': 14, 'what': 15, 'stand': 16, 'help': 17, 'good': 18, 'slow': 19, 'baby': 20, 'study': 21, 'sit': 22, 'which': 23, 'please': 24, 'red': 25, 'brown': 26, 'blue': 27, 'clothes': 28, 'busy': 29, 'family': 30, 'two': 31, 'shoes': 32, 'need': 33, 'play': 34, 'door': 35, 'more': 36, 'we': 37, 'understand': 38, 'danger': 39, 'morning': 40, 'wait': 41, 'bus': 42, 'month': 43, 'yes': 44, 'bad': 45, 'yellow': 46, 'sleep': 47, 'maybe': 48, 'write': 49, 'yesterday': 50, 'green': 51, 'doctor': 52, 'thank you': 53, 'emergency': 54, 'sorry': 55, 'sister': 56, 'child': 57, 'table': 58, 'excited': 59, 'laugh': 60, 'drink': 61, 'listen': 62, 'all': 63, 'food': 64, 'fine': 65, 'store': 66, 'when': 67, 'eat': 68, 'sad': 69, 'wake up': 70, 'car': 71, 'person': 72, 'angry': 73, 'night': 74, 'goodbye': 75, 'today': 76, 'come': 77, 'read': 78, 'where': 79, 'pink': 80, 'always': 81, 'work': 82, 'father': 83, 'afternoon': 84, 'safe': 85, 'cry': 86, 'buy': 87, 'now': 88, 'you': 89, 'fast': 90, 'walk': 91, 'purple': 92, 'pain': 93, 'run': 94, 'love': 95, 'later': 96, 'mother': 97, 'none': 98, 'who': 99, 'watch': 100, 'brother': 101, 'four': 102, 'sick': 103, 'i': 104, 'soon': 105, 'phone': 106, 'book': 107, 'office': 108, 'window': 109, 'hate': 110, 'hello': 111, 'white': 112, 'they': 113, 'why': 114, 'home': 115, 'she': 116, 'stop': 117, 'restaurant': 118, 'after': 119, 'before': 120, 'how': 121, 'police': 122, 'five': 123, 'me': 124, 'hurt': 125, 'orange': 126, 'water': 127, 'nervous': 128, 'scared': 129, 'talk': 130, 'house': 131, 'computer': 132, 'friend': 133, 'bank': 134, 'one': 135, 'bored': 136, 'happy': 137, 'less': 138, 'go': 139}

In [1]:
from tensorflow.keras.models import load_model

model = load_model("sign_language_video_model.h5")




In [24]:
def preprocess_new_video(video_path, sequence_length=60, frame_size=(112, 112)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    
    while len(frames) < sequence_length:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, frame_size)
        frame = frame / 255.0
        frames.append(frame)
    
    cap.release()
    # print(len(frames),sequence_length)
    if len(frames) < sequence_length:
        return None  # Ignore videos that are too short
    
    if len(frames) > sequence_length:
        indices = np.linspace(0, len(frames) - 1, sequence_length, dtype=int)
        frames = [frames[i] for i in indices]
    
    return np.array(frames)


In [57]:
video_path = "SL1/hello/hello_komal.mp4"  # Replace with the actual video path
video_data = preprocess_new_video(video_path)

if video_data is not None:
    video_data = np.expand_dims(video_data, axis=0)  # Add batch dimension
    predictions = model.predict(video_data)
    predicted_class = np.argmax(predictions)
    
    print(f"Predicted Class: {predicted_class}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step
Predicted Class: 111


In [58]:
class_labels = {v: k for k, v in label_map.items()}  # Reverse the label_map
predicted_label = class_labels[predicted_class]

print(f"Predicted Sign Language Gesture: {predicted_label}")

Predicted Sign Language Gesture: hello


In [62]:
# pip install opencv-python mediapipe tensorflow numpy


In [65]:
def pred(path):
    video_path = path  # Replace with the actual video path
    video_data = preprocess_new_video(video_path)
    
    if video_data is not None:
        video_data = np.expand_dims(video_data, axis=0)  # Add batch dimension
        predictions = model.predict(video_data)
        predicted_class = np.argmax(predictions)
        
        class_labels = {v: k for k, v in label_map.items()}  # Reverse the label_map
        predicted_label = class_labels[predicted_class]
        
        print(f"Predicted Sign Language Gesture: {predicted_label}")

In [66]:
import cv2
import os
import uuid
import time

cap = cv2.VideoCapture('SL1/hello/hello_komal.mp4')

if not cap.isOpened():
    print("Error: Could not open video source.")
    exit()

# total_duration = 15
segment_duration = 3

fps = cap.get(cv2.CAP_PROP_FPS)

# total_frames = int(fps * total_duration)
frames_per_segment = int(fps * segment_duration)
print(f"Frames per segment: {frames_per_segment}")
# print(f"Total frames: {total_frames}")

output_folder = f'video_segments_{uuid.uuid4()}'
os.makedirs(output_folder, exist_ok=True)

frame_count = 0
segment_count = 0
list = []
fourcc = cv2.VideoWriter_fourcc(*'mp4v')

out = None
# while frame_count < total_frames:
while True:
    ret, frame = cap.read()
    if not ret: 
        print("Error: Failed to capture frame.")
        break
    if frame_count % frames_per_segment == 0:
        if out is not None:
            out.release()
        segment_filename = os.path.join(output_folder, f'segment_{segment_count}.mp4')
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        out = cv2.VideoWriter(segment_filename, fourcc, fps, (frame_width, frame_height))
        segment_count += 1
        print(f"Started new segment: {segment_filename}")
        list.append(segment_filename)

    out.write(frame)
    frame_count += 1

cap.release()
if out is not None:
    out.release()

print(f"Captured and saved {segment_count} video segments in the '{output_folder}' folder.")            

Frames per segment: 90
Started new segment: video_segments_1588982b-ed03-4909-a786-28bf813eed8f\segment_0.mp4
Error: Failed to capture frame.
Captured and saved 1 video segments in the 'video_segments_1588982b-ed03-4909-a786-28bf813eed8f' folder.


In [67]:
for l in list:
    pred(l)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step
Predicted Sign Language Gesture: hello
