In [None]:
import cv2, mediapipe as mp, numpy as np, pandas as pd
from pathlib import Path

#пути
csv_path    = Path(r"C:\Users\user\Desktop\validation\val_annotations.csv")
video_dir   = ("C:\Users\user\Desktop\validation")    # здесь лежат видеофайлы
features_dir= ("C:\Users\user\Desktop\validation\features)"
features_dir.mkdir(exist_ok=True)

#инициализируем Holistic
mp_holistic = mp.solutions.holistic
holistic    = mp_holistic.Holistic(static_image_mode=False)

#читаем CSV и проходим по каждому клипу
df = pd.read_csv(csv_path)
for _, row in df.iterrows():
    vid_path = video_dir/row["clip_id"]
    cap      = cv2.VideoCapture(str(vid_path))
    seq = []
    while True:
        ret, frame = cap.read()
        if not ret: break
        rgb    = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        res    = holistic.process(rgb)
        lm     = []
        for pts, cnt in [(res.pose_landmarks,33),
                         (res.left_hand_landmarks,21),
                         (res.right_hand_landmarks,21)]:
            if pts:
                lm += [c for lmpt in pts.landmark for c in (lmpt.x,lmpt.y,lmpt.z)]
            else:
                lm += [0.0]*(3*cnt)
        seq.append(lm)
    cap.release()
    arr = np.array(seq, dtype=np.float32)      # shape = (T, 225)
    np.save(features_dir/row["clip_id"].replace(".mp4",".npy"), arr)


In [None]:
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd, numpy as np
from pathlib import Path

# Параметры
csv_path     = Path(r"C:\Users\user\Desktop\validation\val_annotations.csv")
features_dir = csv_path.parent/"features"
device       = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Dataset
class ClipDataset(Dataset):
    def __init__(self, csv_file, features_dir, train=True, seq_len=32):
        df = pd.read_csv(csv_file)
        df = df[df["train"]==train].reset_index(drop=True)
        self.recs = df.to_dict("records")
        self.features_dir = features_dir
        # label2idx
        gestures = sorted(df["gesture"].unique())
        self.lab2idx = {g:i for i,g in enumerate(gestures)}

    def __len__(self):
        return len(self.recs)

    def __getitem__(self, idx):
        rec = self.recs[idx]
        seq = np.load(self.features_dir/rec["clip_id"].replace(".mp4",".npy"))
        # усреднить по time-axis для MLP
        x   = seq.mean(axis=0)                     # shape=(225,)
        y   = self.lab2idx[rec["gesture"]]
        return torch.from_numpy(x), torch.tensor(y)

# лоадеры
train_ds = ClipDataset(csv_path, features_dir, train=True)
val_ds   = ClipDataset(csv_path, features_dir, train=False)
tl = DataLoader(train_ds, batch_size=32, shuffle=True)
vl = DataLoader(val_ds,   batch_size=32)

# 3) простая MLP-модель
class MLP(nn.Module):
    def __init__(self, inp=225, hid=128, out=None):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(inp, hid), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(hid, out)
        )
    def forward(self, x): return self.net(x)

model = MLP(inp=225, hid=256, out=len(train_ds.lab2idx)).to(device)
opt   = optim.Adam(model.parameters(), lr=1e-3)
crit  = nn.CrossEntropyLoss()

#обучение (например, 10 эпох)
for epoch in range(1, 11):
    model.train()
    total, correct = 0.0, 0
    for x, y in tl:
        x, y = x.to(device), y.to(device)
        opt.zero_grad()
        out = model(x)
        loss= crit(out, y)
        loss.backward()
        opt.step()
        total  += loss.item()*x.size(0)
        correct+= (out.argmax(1)==y).sum().item()
    print(f"Epoch {epoch} — Loss: {total/len(train_ds):.4f}, Acc: {correct/len(train_ds):.4f}")

# валидация
model.eval()
correct = 0
with torch.no_grad():
    for x, y in vl:
        x, y = x.to(device), y.to(device)
        correct += (model(x).argmax(1)==y).sum().item()
print(f"Val Accuracy: {correct/len(val_ds):.4f}")


In [None]:
#Обучение MLP и вывод ключевых метрик

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report

train_ds = RSLGestureDataset(ANNOTATIONS_FILE, train=True)
val_ds   = RSLGestureDataset(ANNOTATIONS_FILE, train=False)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE)

model = MLPClassifier(**MLP_PARAMS).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for epoch in range(1, EPOCHS + 1):

    #Тренировка
    model.train()
    total_loss, correct = 0.0, 0
    for x, y in train_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        out   = model(x)
        loss  = criterion(out, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * x.size(0)
        correct    += (out.argmax(1) == y).sum().item()
    train_loss = total_loss / len(train_ds)
    train_acc  = correct / len(train_ds)

# Валидация
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            preds = model(x).argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())
    val_acc = accuracy_score(all_labels, all_preds)

    print(f"Epoch {epoch}/{EPOCHS} — "
          f"Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f} | "
          f"Val acc: {val_acc:.4f}")

# Сохраняем модель
torch.save(model.state_dict(), MODEL_PATH)
print(f"Модель сохранена в {MODEL_PATH}\n")

In [None]:
# Classification report
target_names = train_ds.labels
print("Classification Report:\n")
print(classification_report(all_labels, all_preds, target_names=target_names, digits=4))

# Матрица ошибок
cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(cm, display_labels=target_names)
fig, ax = plt.subplots(figsize=(10,10))
disp.plot(ax=ax, xticks_rotation='vertical')
plt.title("Confusion Matrix (validation)")
plt.tight_layout()
plt.show()
