In [6]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
def set_seed(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)
# load多部影片資料並加上label
def load_and_label_videos(video_files, labels):
    all_data = []
    for video_file, label in zip(video_files, labels):
        data = np.loadtxt(video_file)  
        labeled_data = np.hstack([data, np.full((data.shape[0], 1), label)])  
        all_data.append(labeled_data)
    return np.vstack(all_data)


video_files = ['video1.txt', 'video2.txt', 'video3.txt']  # Mediapipe 輸出的檔案
labels = [0, 1, 2]  # 對應每部影片的標label
data = load_and_label_videos(video_files, labels)
print("合併後的數據形狀：", data.shape)

# 移除靜止 frames
def remove_static_frames(data, threshold=0.01):
    diff = np.linalg.norm(data[1:, :-1] - data[:-1, :-1], axis=1)  
    valid_frames = diff > threshold
    return data[np.insert(valid_frames, 0, True)]

data = remove_static_frames(data)
print("移除靜止 frames 後的數據形狀：", data.shape)


train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
print("訓練集形狀：", train_data.shape, "測試集形狀：", test_data.shape)


class SignLanguageDataset(Dataset):
    def __init__(self, data, sequence_length=30):
        self.data = data
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data) - self.sequence_length

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.sequence_length, :-1]  
        y = self.data[idx + self.sequence_length - 1, -1]  # label
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.long)


sequence_length = 30
train_dataset = SignLanguageDataset(train_data, sequence_length)
test_dataset = SignLanguageDataset(test_data, sequence_length)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        c_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])  
        return out


input_size = train_data.shape[1] - 1  # 不包括標label
hidden_size = 128
num_layers = 2
num_classes = len(set(labels))  # 手語單字數量
learning_rate = 0.001
num_epochs = 20


device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")

train_model(model, train_loader, criterion, optimizer, num_epochs)

#accuracy
def evaluate_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    print(f"模型準確率: {accuracy * 100:.2f}%")
    return accuracy

evaluate_model(model, test_loader)

# 保存模型
torch.save(model.state_dict(), 'sign_language_lstm_model.pth')
print("模型訓練完成並已保存。")



合併後的數據形狀： (301, 164)
移除靜止 frames 後的數據形狀： (301, 164)
訓練集形狀： (240, 164) 測試集形狀： (61, 164)
Epoch [1/20], Loss: 1.0845
Epoch [2/20], Loss: 1.0614
Epoch [3/20], Loss: 1.0615
Epoch [4/20], Loss: 1.0468
Epoch [5/20], Loss: 1.0244
Epoch [6/20], Loss: 0.9804
Epoch [7/20], Loss: 0.9650
Epoch [8/20], Loss: 0.9249
Epoch [9/20], Loss: 0.9231
Epoch [10/20], Loss: 0.8689
Epoch [11/20], Loss: 0.8409
Epoch [12/20], Loss: 0.7709
Epoch [13/20], Loss: 0.7919
Epoch [14/20], Loss: 0.7542
Epoch [15/20], Loss: 0.7301
Epoch [16/20], Loss: 0.6540
Epoch [17/20], Loss: 0.6147
Epoch [18/20], Loss: 0.5964
Epoch [19/20], Loss: 0.5676
Epoch [20/20], Loss: 0.5112
模型準確率: 93.55%
模型訓練完成並已保存。
