In [3]:
import os
import random
from collections import defaultdict

input_txt = '/kaggle/input/keypoint/videoid_label.txt'
out_train = '/kaggle/working/train.txt'
out_val = '/kaggle/working/val.txt'
out_test = '/kaggle/working/test.txt'

#os.makedirs(out_train, exist_ok=True)
#os.makedirs(out_val, exist_ok=True)
#os.makedirs(out_test, exist_ok=True)
# Đọc file và gom theo nhãn
label_dict = defaultdict(list)
with open(input_txt, 'r') as f:
    for line in f:
        line = line.strip()
        if not line: continue
        video_id, label = line.split()
        label_dict[label].append(video_id)

# Chia tỉ lệ 6:4:4 cho từng nhãn
train_lines, val_lines, test_lines = [], [], []
for label, vids in label_dict.items():
    vids = list(vids)
    random.shuffle(vids)
    n = len(vids)
    n_train = round(n * 0.6)
    n_val = round(n * 0.2)
    n_test = n - n_train - n_val
    train = vids[:n_train]
    val = vids[n_train:n_train+n_val]
    test = vids[n_train+n_val:]
    train_lines.extend([f"{vid} {label}\n" for vid in train])
    val_lines.extend([f"{vid} {label}\n" for vid in val])
    test_lines.extend([f"{vid} {label}\n" for vid in test])

# Shuffle lại từng tập để tránh cùng nhãn đứng liền nhau
random.shuffle(train_lines)
random.shuffle(val_lines)
random.shuffle(test_lines)

# Lưu file
with open(out_train, 'w') as f: f.writelines(train_lines)
with open(out_val, 'w') as f: f.writelines(val_lines)
with open(out_test, 'w') as f: f.writelines(test_lines)

print("Đã chia xong. Train:", len(train_lines), "Val:", len(val_lines), "Test:", len(test_lines))

Đã chia xong. Train: 456 Val: 145 Test: 150


# 1. Pretrain Vision encoder

In [5]:
import os
from glob import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms
import torchvision
from tqdm import tqdm
import json

In [6]:
def build_label_map(txt_files):
    labels = set()
    for txt in txt_files:
        with open(txt, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    label = ' '.join(parts[1:]).strip()
                    labels.add(label)
    labels = sorted(labels)
    label_map = {lbl: idx for idx, lbl in enumerate(labels)}
    return label_map

In [7]:
data_root = "/kaggle/input/image-mask/cropped_hands"
train_txt = "/kaggle/working/train.txt"
valid_txt = "/kaggle/working/val.txt"
test_txt = "/kaggle/working/test.txt"
batch_size = 32
num_workers = 4
num_epochs = 6
lr = 1e-3
out_ckpt = "/kaggle/working/pretrained_hand_rgb.pth"
out_labelmap = "/kaggle/working/label_map.json"

label_map = build_label_map([train_txt, valid_txt, test_txt])
num_classes = len(label_map)
print("Số lớp:", num_classes)
print("Sample label_map:", dict(list(label_map.items())[:5]))

with open(out_labelmap, "w") as f:
    json.dump(label_map, f)

Số lớp: 32
Sample label_map: {'all': 0, 'before': 1, 'black': 2, 'book': 3, 'candy': 4}


In [10]:
# ==== 2. Dataset for cropped hand images ====
class HandImageDataset(Dataset):
    def __init__(self, data_root, list_file, label_map, transform=None):
        self.samples = []
        self.transform = transform
        with open(list_file, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) == 2:
                    video_id, label = parts
                    label_idx = label_map[label]
                    #img_dir = os.path.join(data_root, video_id)
                    # All *_left.jpg and *_right.jpg (can add filter for frame sampling if needed)
                    imgs = sorted(glob(os.path.join(data_root, f"{video_id}_*_left.jpg"))) + \
                           sorted(glob(os.path.join(data_root, f"{video_id}_*_right.jpg")))
                    for img_path in imgs:
                        if os.path.isfile(img_path):
                            self.samples.append((img_path, label_idx))
    def __len__(self):
        return len(self.samples)
    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, label

In [11]:
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ColorJitter(0.4, 0.4, 0.4, 0.1),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

train_ds = HandImageDataset(data_root, train_txt, label_map, transform=transform)
val_ds = HandImageDataset(data_root, valid_txt, label_map, transform=transform)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

print(f"Số ảnh train: {len(train_ds)}, Số ảnh val: {len(val_ds)}")

Số ảnh train: 2305, Số ảnh val: 729


In [12]:
# ==== 3. Vision Model ====
class VisionClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        backbone = torchvision.models.efficientnet_b0(pretrained=True)
        self.features = backbone.features
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Linear(1280, num_classes)
    def forward(self, x):
        feat = self.features(x)
        feat = self.pool(feat).view(x.size(0), -1)
        return self.classifier(feat)


In [13]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = VisionClassifier(num_classes)
if torch.cuda.device_count() > 1:
    print("Using DataParallel with {} GPUs".format(torch.cuda.device_count()))
    model = nn.DataParallel(model)
model = model.to(device)
optimizer = optim.AdamW(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 138MB/s]


Using DataParallel with 2 GPUs


In [14]:
best_val_acc = 0
for epoch in range(num_epochs):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for imgs, labels in tqdm(train_loader, desc=f"Train Epoch {epoch+1}", leave=False):
        imgs, labels = imgs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        logits = model(imgs)
        loss = criterion(logits, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * imgs.size(0)
        preds = logits.argmax(1)
        correct += (preds == labels).sum().item()
        total += imgs.size(0)
    train_acc = correct / total
    train_loss = total_loss / total

    # Validation
    model.eval()
    val_loss, val_correct, val_total = 0, 0, 0
    with torch.no_grad():
        for imgs, labels in tqdm(val_loader, desc="Valid", leave=False):
            imgs, labels = imgs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            logits = model(imgs)
            loss = criterion(logits, labels)
            val_loss += loss.item() * imgs.size(0)
            preds = logits.argmax(1)
            val_correct += (preds == labels).sum().item()
            val_total += imgs.size(0)
    val_acc = val_correct / val_total
    val_loss = val_loss / val_total

    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

    # Save best
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({
            "model": model.state_dict(),
            "label_map": label_map
        }, out_ckpt)
        print(f"Best model saved at epoch {epoch+1}, val_acc={val_acc:.4f}")

print("Done. Best val acc:", best_val_acc)

                                                              

Epoch 1/6 | Train Loss: 3.0402 Acc: 0.1831 | Val Loss: 3.2498 Acc: 0.1948
Best model saved at epoch 1, val_acc=0.1948


                                                              

Epoch 2/6 | Train Loss: 2.3150 Acc: 0.3662 | Val Loss: 3.2723 Acc: 0.1687


                                                              

Epoch 3/6 | Train Loss: 1.8579 Acc: 0.4720 | Val Loss: 3.3247 Acc: 0.2236
Best model saved at epoch 3, val_acc=0.2236


                                                              

Epoch 4/6 | Train Loss: 1.5030 Acc: 0.5774 | Val Loss: 3.6575 Acc: 0.2140


                                                              

Epoch 5/6 | Train Loss: 1.2124 Acc: 0.6560 | Val Loss: 3.5225 Acc: 0.2757
Best model saved at epoch 5, val_acc=0.2757


                                                              

Epoch 6/6 | Train Loss: 0.9412 Acc: 0.7293 | Val Loss: 4.1048 Acc: 0.2606
Done. Best val acc: 0.2757201646090535




# 2. pretrain STGCN

In [16]:
import numpy as np
import torch
from torch.utils.data import Dataset

class PoseSpatialPartDataset(Dataset):
    def __init__(self, data_root, txt_file, label_map, part='body'):
        self.samples = []
        self.label_map = label_map
        self.part = part
        with open(txt_file) as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) < 2:
                    print(f"WARNING: dòng bị lỗi format: {line}")
                    continue
                npy_id = parts[0]
                label = ' '.join(parts[1:]).strip()
                if label not in label_map:
                    print(f"WARNING: label '{label}' chưa có trong label_map!")
                    continue
                npy_path = f"{data_root}/{npy_id}_keypoint.npy"
                self.samples.append((npy_path, int(label_map[label])))
        # Define keypoint slices for each part
        if part == 'body':
            self.idx_start, self.idx_end = 0, 25
        elif part == 'left':
            self.idx_start, self.idx_end = 25, 46
        elif part == 'right':
            self.idx_start, self.idx_end = 46, 67
        else:
            raise ValueError("Unknown part: " + part)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        npy_path, label = self.samples[idx]
        keypoints = np.load(npy_path)  # (T, 67, 3) expected
        # Auto fix shape if needed
        if keypoints.shape[-2:] == (67, 3):
            pass
        elif keypoints.shape[0] == 67 and keypoints.shape[1] == 3:
            keypoints = np.transpose(keypoints, (2, 0, 1))
        elif keypoints.shape[1] == 3 and keypoints.shape[2] == 67:
            keypoints = np.transpose(keypoints, (0, 2, 1))
        else:
            raise RuntimeError(f"Unrecognized keypoints shape: {keypoints.shape}")
        part_kp = keypoints[:, self.idx_start:self.idx_end, :]  # (T, N, 3)
        if idx == 0:
            print(f"Dataset part_kp.shape: {part_kp.shape}")
        return torch.tensor(part_kp, dtype=torch.float32), label

In [17]:
class SpatialGCNLayer(nn.Module):
    def __init__(self, in_channels, out_channels, A):
        super().__init__()
        self.register_buffer('A', A)
        self.fc = nn.Linear(in_channels, out_channels)
        self.bn = nn.BatchNorm1d(out_channels)

    def forward(self, x):  # x: (B, N, in_channels)
        h = self.fc(x)  # (B, N, out_channels)
        h = h.permute(0, 2, 1)  # (B, out_channels, N)
        h = torch.matmul(h, self.A)  # (B, out_channels, N)
        h = self.bn(h)  # BatchNorm trên out_channels
        h = h.permute(0, 2, 1)  # (B, N, out_channels)
        return torch.relu(h)

In [18]:
class SpatialPoseEncoder(nn.Module):
    def __init__(self, in_channels, num_joints, num_classes, A, hid_dim=128, out_dim=256):
        super().__init__()
        self.gcn1 = SpatialGCNLayer(in_channels, hid_dim, A)
        self.gcn2 = SpatialGCNLayer(hid_dim, out_dim, A)
        self.classifier = nn.Linear(out_dim * num_joints, num_classes)

    def forward(self, x):  # x: (B, T, N, 3)
        #print("Encoder x.shape:", x.shape)
        B, T, N, C = x.shape
        assert N == self.gcn1.A.shape[0], f"x.shape={x.shape}, A.shape={self.gcn1.A.shape}"
        x = x.view(B * T, N, C)  # (B*T, N, 3)
        h = self.gcn1(x)         # (B*T, N, hid_dim)
        h = self.gcn2(h)         # (B*T, N, out_dim)
        h = h.view(B, T, N, -1)  # (B, T, N, out_dim)
        h = h.mean(1)            # (B, N, out_dim)
        h = h.permute(0, 2, 1)   # (B, out_dim, N)
        logits = self.classifier(h.flatten(1))  # (B, num_classes)
        return logits, h

In [19]:
def gather_special_frames(pose_feat, mask_indices):
    """
    pose_feat: (B, C, N, T)
    mask_indices: list of [tensor(F_b,), ...]  # F_b: số frame của mỗi sample cần fusion
    Return: list of (B, C, N, F_b)
    """
    outputs = []
    for b, idxs in enumerate(mask_indices):
        # idxs: (F_b,), pose_feat[b]: (C, N, T)
        sel = pose_feat[b, :, :, idxs]  # (C, N, F_b)
        outputs.append(sel)
    return outputs

In [20]:
def get_spatial_adjacency(num_node, edge):
    A = np.zeros((num_node, num_node))
    for i, j in edge:
        A[i, j] = 1
        A[j, i] = 1
    # Normalize
    Dl = np.sum(A, 0)
    Dn = np.zeros((num_node, num_node))
    for i in range(num_node):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i] ** (-1)
    A_normalized = np.dot(A, Dn)
    return torch.tensor(A_normalized, dtype=torch.float32)

def get_body_spatial_graph():
    # 25 body keypoints (Mediapipe hoặc OpenPose định nghĩa)
    num_node = 25
    self_link = [(i, i) for i in range(num_node)]
    neighbor_link = [
        (0, 1), (1, 2), (2, 3), (3, 7),
        (0, 4), (4, 5), (5, 6), (6, 8),
        (9, 10), (11, 12), (11, 13), (13, 15), (15, 21), (15, 19), (15, 17),
        (17, 19), (11, 23), (12, 14), (14, 16), (16, 18), (16, 20), (16, 22),
        (18, 20), (12, 24), (23, 24)
    ]
    edge = self_link + neighbor_link
    return get_spatial_adjacency(num_node, edge)

def get_left_hand_spatial_graph():
    # 21 left hand keypoints (Mediapipe)
    num_node = 21
    self_link = [(i, i) for i in range(num_node)]
    neighbor_link = [
        (0, 1),(1, 2),(2, 3),(3, 4),
        (0, 5),(5, 6),(6, 7),(7, 8),
        (0, 9),(9, 10),(10, 11),(11, 12),
        (0, 13),(13, 14),(14, 15),(15, 16),
        (0, 17),(17, 18),(18, 19),(19, 20)
    ]
    edge = self_link + neighbor_link
    return get_spatial_adjacency(num_node, edge)
def get_right_hand_spatial_graph():
    # 21 right hand keypoints (Mediapipe)
    num_node = 21
    self_link = [(i, i) for i in range(num_node)]
    neighbor_link = [
        (0, 1),(1, 2),(2, 3),(3, 4),
        (0, 5),(5, 6),(6, 7),(7, 8),
        (0, 9),(9, 10),(10, 11),(11, 12),
        (0, 13),(13, 14),(14, 15),(15, 16),
        (0, 17),(17, 18),(18, 19),(19, 20)
    ]
    edge = self_link + neighbor_link
    return get_spatial_adjacency(num_node, edge)

In [21]:
PART_INFO = {
    'body':  (0, 25),
    'left':  (25, 46),
    'right': (46, 67)
}

In [22]:
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch.optim as optim
import torch.nn as nn

def train_pose_spatial_part(part, num_joints, get_A_func, best_ckpt_file):
    print(f"\n--- Pretraining {part} ---")
    train_ds = PoseSpatialPartDataset(data_root, train_txt, label_map, part=part)
    val_ds = PoseSpatialPartDataset(data_root, val_txt, label_map, part=part)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
    print(f"Số mẫu train: {len(train_ds)}, val: {len(val_ds)}")
    print(f"Số batch train: {len(train_loader)}, val: {len(val_loader)}")

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    A = get_A_func().to(device)
    model = SpatialPoseEncoder(in_channels=3, num_joints=num_joints, num_classes=num_classes, A=A)
    if torch.cuda.device_count() > 1:
        print("Using DataParallel with {} GPUs".format(torch.cuda.device_count()))
        model = nn.DataParallel(model)
    model = model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    best_val_acc = 0
    
    for epoch in range(num_epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        for x, labels in tqdm(train_loader, desc=f"Train {part} Epoch {epoch+1}", leave=False):
            #print("Batch x.shape:", x.shape)
            x, labels = x.to(device), labels.to(device)
            logits, _ = model(x)
            loss = criterion(logits, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * x.size(0)
            preds = logits.argmax(1)
            correct += (preds == labels).sum().item()
            total += x.size(0)
        if total > 0:
            train_acc = correct / total
            train_loss = total_loss / total
        else:
            train_acc = 0
            train_loss = 0
            print("WARNING: Không có sample nào trong batch train!")
        
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for x, labels in tqdm(val_loader, desc=f"Val {part} Epoch {epoch+1}", leave=False):
                x, labels = x.to(device), labels.to(device)
                logits, _ = model(x)
                loss = criterion(logits, labels)
                val_loss += loss.item() * x.size(0)
                preds = logits.argmax(1)
                val_correct += (preds == labels).sum().item()
                val_total += x.size(0)
        if val_total > 0:
            val_acc = val_correct / val_total
            val_loss = val_loss / val_total
        else:
            val_acc = 0
            val_loss = 0
            print("WARNING: Không có sample nào trong batch val!")

        print(f"[{part}] Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
              f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save({"model": model.state_dict(), "label_map": label_map}, best_ckpt_file)
            print(f"Best model saved at epoch {epoch+1}, val_acc={val_acc:.4f}")

    print(f"Done {part}. Best val acc: {best_val_acc:.4f}")

In [23]:
data_root = "/kaggle/input/keypoint/keypoints"
train_txt = "/kaggle/working/train.txt"
val_txt = "/kaggle/working/val.txt"
test_txt = "/kaggle/working/test.txt"
batch_size = 32
num_workers = 4
num_epochs = 10
lr = 1e-3

label_map = build_label_map([train_txt, val_txt, test_txt])
num_classes = len(label_map)
with open("label_map_pose.json", "w") as f:
    json.dump(label_map, f)

In [24]:
print(get_body_spatial_graph().shape)       # (25, 25)
print(get_left_hand_spatial_graph().shape)  # (21, 21)
print(get_right_hand_spatial_graph().shape) # (21, 21)

torch.Size([25, 25])
torch.Size([21, 21])
torch.Size([21, 21])


In [25]:
train_pose_spatial_part('body',  num_joints=25, get_A_func=get_body_spatial_graph, best_ckpt_file="/kaggle/working/spatial_body_best.pth")
train_pose_spatial_part('left',  num_joints=21, get_A_func=get_left_hand_spatial_graph, best_ckpt_file="/kaggle/working/spatial_left_best.pth")
train_pose_spatial_part('right', num_joints=21, get_A_func=get_right_hand_spatial_graph, best_ckpt_file="/kaggle/working/spatial_right_best.pth")


--- Pretraining body ---
Số mẫu train: 456, val: 145
Số batch train: 15, val: 5
Using DataParallel with 2 GPUs


  return F.linear(input, self.weight, self.bias)
Train body Epoch 1:  40%|████      | 6/15 [00:00<00:00,  9.88it/s]

Dataset part_kp.shape: (64, 25, 3)


Val body Epoch 1:   0%|          | 0/5 [00:00<?, ?it/s]            

Dataset part_kp.shape: (64, 25, 3)


                                                               

[body] Epoch 1/10 | Train Loss: 4.4749 Acc: 0.0482 | Val Loss: 3.3951 Acc: 0.0690
Best model saved at epoch 1, val_acc=0.0690


Train body Epoch 2:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 25, 3)


Val body Epoch 2:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 25, 3)


                                                               

[body] Epoch 2/10 | Train Loss: 3.3263 Acc: 0.1535 | Val Loss: 3.4384 Acc: 0.0966
Best model saved at epoch 2, val_acc=0.0966


Train body Epoch 3:  60%|██████    | 9/15 [00:00<00:00, 34.18it/s]

Dataset part_kp.shape: (64, 25, 3)


Val body Epoch 3:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 25, 3)


                                                               

[body] Epoch 3/10 | Train Loss: 3.0046 Acc: 0.1645 | Val Loss: 3.4457 Acc: 0.0828


Train body Epoch 4:   7%|▋         | 1/15 [00:00<00:02,  6.01it/s]

Dataset part_kp.shape: (64, 25, 3)


Val body Epoch 4:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 25, 3)


                                                               

[body] Epoch 4/10 | Train Loss: 2.7524 Acc: 0.2215 | Val Loss: 3.1755 Acc: 0.1241
Best model saved at epoch 4, val_acc=0.1241


Train body Epoch 5:   7%|▋         | 1/15 [00:00<00:02,  5.66it/s]

Dataset part_kp.shape: (64, 25, 3)


Val body Epoch 5:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 25, 3)


                                                               

[body] Epoch 5/10 | Train Loss: 2.6069 Acc: 0.2478 | Val Loss: 3.0355 Acc: 0.1931
Best model saved at epoch 5, val_acc=0.1931


Train body Epoch 6:   7%|▋         | 1/15 [00:00<00:02,  5.91it/s]

Dataset part_kp.shape: (64, 25, 3)


Val body Epoch 6:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 25, 3)


                                                               

[body] Epoch 6/10 | Train Loss: 2.3707 Acc: 0.2763 | Val Loss: 3.0938 Acc: 0.1862


Train body Epoch 7:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 25, 3)


Val body Epoch 7:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 25, 3)


                                                               

[body] Epoch 7/10 | Train Loss: 2.2551 Acc: 0.3202 | Val Loss: 3.1144 Acc: 0.1724


Train body Epoch 8:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 25, 3)


Val body Epoch 8:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 25, 3)


                                                               

[body] Epoch 8/10 | Train Loss: 2.2885 Acc: 0.3180 | Val Loss: 3.1482 Acc: 0.1793


Train body Epoch 9:   7%|▋         | 1/15 [00:00<00:02,  6.28it/s]

Dataset part_kp.shape: (64, 25, 3)


Val body Epoch 9:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 25, 3)


                                                               

[body] Epoch 9/10 | Train Loss: 2.1977 Acc: 0.3333 | Val Loss: 3.1713 Acc: 0.2138
Best model saved at epoch 9, val_acc=0.2138


Train body Epoch 10:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 25, 3)


Val body Epoch 10:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 25, 3)


                                                                

[body] Epoch 10/10 | Train Loss: 2.0976 Acc: 0.3333 | Val Loss: 2.9866 Acc: 0.2345
Best model saved at epoch 10, val_acc=0.2345
Done body. Best val acc: 0.2345

--- Pretraining left ---
Số mẫu train: 456, val: 145
Số batch train: 15, val: 5
Using DataParallel with 2 GPUs


Train left Epoch 1:   7%|▋         | 1/15 [00:00<00:02,  5.94it/s]

Dataset part_kp.shape: (64, 21, 3)


Val left Epoch 1:   0%|          | 0/5 [00:00<?, ?it/s]            

Dataset part_kp.shape: (64, 21, 3)


                                                               

[left] Epoch 1/10 | Train Loss: 3.9497 Acc: 0.0658 | Val Loss: 3.3844 Acc: 0.0759
Best model saved at epoch 1, val_acc=0.0759


Train left Epoch 2:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 21, 3)


Val left Epoch 2:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                               

[left] Epoch 2/10 | Train Loss: 3.2880 Acc: 0.1316 | Val Loss: 3.3171 Acc: 0.0759


Train left Epoch 3:  53%|█████▎    | 8/15 [00:00<00:00, 36.04it/s]

Dataset part_kp.shape: (64, 21, 3)


Val left Epoch 3:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                               

[left] Epoch 3/10 | Train Loss: 3.1386 Acc: 0.1162 | Val Loss: 3.3551 Acc: 0.0897
Best model saved at epoch 3, val_acc=0.0897


Train left Epoch 4:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 21, 3)


Val left Epoch 4:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                               

[left] Epoch 4/10 | Train Loss: 3.0724 Acc: 0.1469 | Val Loss: 3.3598 Acc: 0.1655
Best model saved at epoch 4, val_acc=0.1655


Train left Epoch 5:  53%|█████▎    | 8/15 [00:00<00:00, 35.79it/s]

Dataset part_kp.shape: (64, 21, 3)


Val left Epoch 5:   0%|          | 0/5 [00:00<?, ?it/s]            

Dataset part_kp.shape: (64, 21, 3)


                                                               

[left] Epoch 5/10 | Train Loss: 2.8416 Acc: 0.1930 | Val Loss: 3.1203 Acc: 0.1724
Best model saved at epoch 5, val_acc=0.1724


Train left Epoch 6:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 21, 3)


Val left Epoch 6:   0%|          | 0/5 [00:00<?, ?it/s]            

Dataset part_kp.shape: (64, 21, 3)


                                                               

[left] Epoch 6/10 | Train Loss: 2.8156 Acc: 0.1820 | Val Loss: 3.3003 Acc: 0.2069
Best model saved at epoch 6, val_acc=0.2069


Train left Epoch 7:   7%|▋         | 1/15 [00:00<00:02,  6.00it/s]

Dataset part_kp.shape: (64, 21, 3)


Val left Epoch 7:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                               

[left] Epoch 7/10 | Train Loss: 2.7344 Acc: 0.2346 | Val Loss: 3.1155 Acc: 0.1310


Train left Epoch 8:  67%|██████▋   | 10/15 [00:00<00:00, 43.16it/s]

Dataset part_kp.shape: (64, 21, 3)


Val left Epoch 8:   0%|          | 0/5 [00:00<?, ?it/s]            

Dataset part_kp.shape: (64, 21, 3)


                                                               

[left] Epoch 8/10 | Train Loss: 2.5974 Acc: 0.2368 | Val Loss: 3.0968 Acc: 0.1448


Train left Epoch 9:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 21, 3)


Val left Epoch 9:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                               

[left] Epoch 9/10 | Train Loss: 2.6038 Acc: 0.2281 | Val Loss: 3.0848 Acc: 0.1724


Train left Epoch 10:   7%|▋         | 1/15 [00:00<00:02,  6.01it/s]

Dataset part_kp.shape: (64, 21, 3)


Val left Epoch 10:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                                

[left] Epoch 10/10 | Train Loss: 2.6018 Acc: 0.2412 | Val Loss: 3.4086 Acc: 0.1448
Done left. Best val acc: 0.2069

--- Pretraining right ---
Số mẫu train: 456, val: 145
Số batch train: 15, val: 5
Using DataParallel with 2 GPUs


Train right Epoch 1:  60%|██████    | 9/15 [00:00<00:00, 37.21it/s]

Dataset part_kp.shape: (64, 21, 3)


Val right Epoch 1:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                                

[right] Epoch 1/10 | Train Loss: 3.9818 Acc: 0.0680 | Val Loss: 3.4868 Acc: 0.0207
Best model saved at epoch 1, val_acc=0.0207


Train right Epoch 2:   7%|▋         | 1/15 [00:00<00:02,  6.18it/s]

Dataset part_kp.shape: (64, 21, 3)


Val right Epoch 2:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                                

[right] Epoch 2/10 | Train Loss: 3.1762 Acc: 0.1338 | Val Loss: 3.8750 Acc: 0.0552
Best model saved at epoch 2, val_acc=0.0552


Train right Epoch 3:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 21, 3)


Val right Epoch 3:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                                

[right] Epoch 3/10 | Train Loss: 2.8577 Acc: 0.1689 | Val Loss: 3.1885 Acc: 0.1172
Best model saved at epoch 3, val_acc=0.1172


Train right Epoch 4:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 21, 3)


Val right Epoch 4:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                                

[right] Epoch 4/10 | Train Loss: 2.5788 Acc: 0.2697 | Val Loss: 2.7150 Acc: 0.2690
Best model saved at epoch 4, val_acc=0.2690


Train right Epoch 5:   7%|▋         | 1/15 [00:00<00:02,  6.10it/s]

Dataset part_kp.shape: (64, 21, 3)


Val right Epoch 5:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                                

[right] Epoch 5/10 | Train Loss: 2.4677 Acc: 0.2982 | Val Loss: 2.9977 Acc: 0.2897
Best model saved at epoch 5, val_acc=0.2897


Train right Epoch 6:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 21, 3)


Val right Epoch 6:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                                

[right] Epoch 6/10 | Train Loss: 2.4150 Acc: 0.3158 | Val Loss: 2.8602 Acc: 0.2069


Train right Epoch 7:   7%|▋         | 1/15 [00:00<00:02,  6.13it/s]

Dataset part_kp.shape: (64, 21, 3)


Val right Epoch 7:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                                

[right] Epoch 7/10 | Train Loss: 2.2081 Acc: 0.3465 | Val Loss: 2.6691 Acc: 0.2759


Train right Epoch 8:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 21, 3)


Val right Epoch 8:   0%|          | 0/5 [00:00<?, ?it/s]            

Dataset part_kp.shape: (64, 21, 3)


                                                                

[right] Epoch 8/10 | Train Loss: 2.1461 Acc: 0.3816 | Val Loss: 2.6703 Acc: 0.3310
Best model saved at epoch 8, val_acc=0.3310


Train right Epoch 9:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 21, 3)


Val right Epoch 9:   0%|          | 0/5 [00:00<?, ?it/s]           

Dataset part_kp.shape: (64, 21, 3)


                                                                

[right] Epoch 9/10 | Train Loss: 2.0526 Acc: 0.4276 | Val Loss: 2.9096 Acc: 0.2966


Train right Epoch 10:   0%|          | 0/15 [00:00<?, ?it/s]

Dataset part_kp.shape: (64, 21, 3)


Val right Epoch 10:   0%|          | 0/5 [00:00<?, ?it/s]            

Dataset part_kp.shape: (64, 21, 3)


                                                                 

[right] Epoch 10/10 | Train Loss: 2.1560 Acc: 0.3838 | Val Loss: 2.7161 Acc: 0.3172
Done right. Best val acc: 0.3310




# 3. WLASL Module