In [1]:
import cv2
import mediapipe as mp
import numpy as np
import os
import tensorflow as tf

2025-01-28 16:25:35.595949: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-28 16:25:35.597066: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-28 16:25:35.648708: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-28 16:25:35.827512: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import pickle

with open('X_train.pkl', 'rb') as f:
    X_train = pickle.load(f)

with open('y_train.pkl', 'rb') as f:
    y = pickle.load(f)

In [3]:
unique_classes = set(y)

In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_encoded = le.fit_transform(y)

unique, counts = np.unique(y_encoded, return_counts=True)
distribution = pd.DataFrame({'Class': unique, 'Count': counts})

summary = distribution['Count'].describe()

summary_info = {
    'Total Classes': len(distribution),
    'Total Instances': distribution['Count'].sum(),
    'Mean Instances per Class': summary['mean'],
    'Median Instances per Class': summary['50%'],
    'Standard Deviation': summary['std'],
    'Min Instances in a Class': summary['min'],
    'Max Instances in a Class': summary['max'],
    '25th Percentile': summary['25%'],
    '75th Percentile': summary['75%']
}

for key, value in summary_info.items():
    print(f"{key}: {value:.2f}")

Total Classes: 2001.00
Total Instances: 172002.00
Mean Instances per Class: 85.96
Median Instances per Class: 79.00
Standard Deviation: 37.17
Min Instances in a Class: 16.00
Max Instances in a Class: 541.00
25th Percentile: 61.00
75th Percentile: 101.00


In [5]:
X_train = np.array(X_train, dtype=object) 

In [6]:
X_train.shape

(172002, 20, 6)

In [7]:
# Pad all videos in X_train to have the same number of frames (max_frames) with zeros
max_frames = max(len(video) for video in X_train)
print("Max frames:", max_frames)

for video_data in X_train:
    while len(video_data) < max_frames:
        video_data.append([0] * 34)

Max frames: 20


In [8]:
# Standardize the shape and structure of each video's data in X_train
def resolve_array(X_train):
    n = len(X_train)  # number_of_videos
    
    new_X_train = []
    
    for i, video_data in enumerate(X_train): # Iterate through each video
        U_X_train = np.zeros((max_frames, 34)) 

        for a in range(max_frames):
            U_X_train[a][:4] = video_data[a][:4] # Copy first 4 values directly
            for j in range(15):
                if isinstance(video_data[a][4], (list, np.ndarray)): # If [a][4] is a list/array
                    if np.all(np.array(video_data[a][4]) == 0): # If all values are 0
                        U_X_train[a][4:19] = 0  
                    else: # If not all values are 0
                        U_X_train[a][4:19] = video_data[a][4][:15] 
                else: # If [a][4] is an integer
                    if video_data[a][4] == 0: # If it is 0
                        U_X_train[a][4:19] = 0 

                if isinstance(video_data[a][5], (list, np.ndarray)): 
                    if np.all(np.array(video_data[a][5]) == 0): 
                        U_X_train[a][19:34] = 0
                    else:
                        U_X_train[a][19:34] = video_data[a][5][:15]
                else:
                    if video_data[a][5] == 0:
                        U_X_train[a][19:34] = 0

        new_X_train.append(U_X_train)
    
    return new_X_train

In [9]:
X_train = resolve_array(X_train)

In [10]:
X_train = np.array(X_train, dtype=object)

In [11]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

class_labels = {index: label for index, label in enumerate(label_encoder.classes_)}

In [12]:
X = X_train.astype('float32')
y_encoded = y_encoded.astype('int32')

In [13]:
print(len(X))
print(len(y_encoded))

172002
172002


In [14]:
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

# 1. Calculate the number of samples per class
class_counts = np.bincount(y_encoded)


# 2. Duplicate samples for classes with only one sample to ensure minimum count of 2
for i, count in enumerate(class_counts):
    if count == 1:
        index = np.where(y_encoded == i)[0][0]
        X = np.concatenate([X, X[index:index+1]], axis=0)
        y_encoded = np.concatenate([y_encoded, y_encoded[index:index+1]], axis=0)

# 3. Reshape data to 2D for SMOTE application
n_samples, max_frames, M = X.shape
X_reshaped = X.reshape(n_samples, -1)

# 4. Apply SMOTE
min_class_samples = class_counts[class_counts > 0].min()
k_neighbors = max(min(min_class_samples - 1, 3), 1)
smote = SMOTE(sampling_strategy='auto', k_neighbors=k_neighbors, random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_reshaped, y_encoded)

# 5. Reshape the data back to 3D
X_resampled = X_resampled.reshape(-1, max_frames, M)

print(f"Original dataset shape: {X.shape}")
print(f"Resampled dataset shape: {X_resampled.shape}")
print(f"Original class distribution: {np.bincount(y_encoded)}")
print(f"Resampled class distribution: {np.bincount(y_resampled)}")

Original dataset shape: (172002, 20, 34)
Resampled dataset shape: (1082541, 20, 34)
Original class distribution: [ 64  56  59 ...  59  80 541]
Resampled class distribution: [541 541 541 ... 541 541 541]


In [15]:
import tensorflow as tf

# Define the number of sign language gestures to recognize
N = 2001

# Define the number of input features computed in one frame
M = 34 

model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(max_frames, M), name='input'), 
    tf.keras.layers.LSTM(1024, return_sequences=False), # false로 바꿈 / 512에서 1024로 
    tf.keras.layers.Dense(1024, activation=tf.nn.relu), 
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(256, activation=tf.nn.relu), 
    tf.keras.layers.Dense(256, activation=tf.nn.relu), 
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(N, activation=tf.nn.softmax, name='output')
])


model.compile(optimizer='adam', loss='CategoricalCrossentropy', metrics=['accuracy'])
model.summary()


2025-01-28 16:40:17.858749: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [None]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np

# 1. One-hot encode y_resampled
num_classes = len(np.unique(y_resampled))
y_resampled_one_hot = to_categorical(y_resampled, num_classes=num_classes)

# 2. Normalize X_resampled
num_samples, num_frames, num_features = X_resampled.shape
scaler = MinMaxScaler()
X_resampled_reshaped = X_resampled.reshape(-1, num_features)  # Reshape to (samples * frames, features)
X_resampled_normalized = scaler.fit_transform(X_resampled_reshaped)
X_resampled_normalized = X_resampled_normalized.reshape(num_samples, num_frames, num_features)

# 3. Train-Test Split
X_train, X_val, y_train, y_val = train_test_split(X_resampled_normalized, y_resampled_one_hot, 
                                                  test_size=0.2, random_state=42)

# Top-3 Accuracy 
top_k_metric = tf.keras.metrics.TopKCategoricalAccuracy(k=3, name='top_3_accuracy')

model.compile(optimizer='adam', 
              loss='CategoricalCrossentropy', 
              metrics=['accuracy', top_k_metric])

history = model.fit(X_train, y_train, 
                    epochs=30, 
                    batch_size=32, 
                    validation_data=(X_val, y_val))


print("Final Training Accuracy: ", history.history['accuracy'][-1])
print("Final Training Top-3 Accuracy: ", history.history['top_3_accuracy'][-1])
print("Final Validation Accuracy: ", history.history['val_accuracy'][-1])
print("Final Validation Top-3 Accuracy: ", history.history['val_top_3_accuracy'][-1])


In [None]:
model.save('asl_top3_accuracy_model.h5')  