In [9]:
import numpy as np
import torch
import torch.nn as nn


class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        c_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])  
        return out


device = 'cuda' if torch.cuda.is_available() else 'cpu'
input_size = 163  
hidden_size = 128
num_layers = 2
num_classes = 3  # 單字種類數量
model = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)
model.load_state_dict(torch.load('sign_language_lstm_model.pth', map_location=device))
model.eval()  
def pad_keypoints(keypoints, sequence_length):
    """
    當影片的幀數不足 sequence_length 時，填充數據。
    如果數據為空，則用全零矩陣填充。
    """
    if keypoints.shape[0] == 0:
        print("影片數據為空，使用全零矩陣填充。")
        return np.zeros((sequence_length, input_size)) 

    if keypoints.shape[0] < sequence_length:
        padding = sequence_length - keypoints.shape[0]
        last_frame = keypoints[-1] 
        padded_frames = np.tile(last_frame, (padding, 1))  
        keypoints = np.vstack([keypoints, padded_frames])  # 拼接填充數據

    return keypoints


# 處理影片數據
def process_video(video_path):
    """
    加載影片的關節數據，假設為文本文件。
    每行表示一幀的關節數據，返回形狀為 (幀數, 特徵數) 的 NumPy 數組。
    """
    try:
        keypoints = np.loadtxt(video_path)  # 加載文本數據
        if keypoints.ndim == 1:  # 如果只有一行數據，調整為 (1, 特徵數)
            keypoints = keypoints.reshape(1, -1)
    except Exception as e:
        print(f"無法加載影片數據: {e}")
        keypoints = np.empty((0, input_size))  # 返回空數據作為占位
    return keypoints



class_mapping = {0: "下雨", 1: "分數", 2: "工作小組"}


video_path = './video3.txt'
keypoints = process_video(video_path)

# 檢查影片數據長度
sequence_length = 30
keypoints = pad_keypoints(keypoints, sequence_length)  # 填充不足的幀數

input_sequence = keypoints[:sequence_length]
input_tensor = torch.tensor(input_sequence, dtype=torch.float32).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')

print(f"處理後的輸入序列形狀: {input_tensor.shape}")

with torch.no_grad():
    output = model(input_tensor) 
    predicted_class = torch.argmax(output, dim=1).item() 

predicted_word = class_mapping[predicted_class]
print(f"預測的手語單字是: {predicted_word}")




處理後的輸入序列形狀: torch.Size([1, 30, 163])
預測的手語單字是: 分數


  model.load_state_dict(torch.load('sign_language_lstm_model.pth', map_location=device))
