In [1]:
!pip install kagglehub

import kagglehub
import os

# Download dataset
path = kagglehub.dataset_download("orvile/tennis-player-actions-dataset")

print("Dataset files saved to:", path)
print("Files:", os.listdir(path))


Downloading from https://www.kaggle.com/api/v1/datasets/download/orvile/tennis-player-actions-dataset?dataset_version_number=1...


100%|██████████| 503M/503M [00:02<00:00, 179MB/s]

Extracting files...





Dataset files saved to: /root/.cache/kagglehub/datasets/orvile/tennis-player-actions-dataset/versions/1
Files: ['Tennis Player Actions Dataset for Human Pose Estimation']


In [2]:
for root, dirs, files in os.walk(path):
    level = root.replace(path, "").count(os.sep)  # indent based on depth
    indent = " " * 4 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = " " * 4 * (level + 1)
    # print files inside annotations folder
    if "annotations" in root:
        for f in files:
            print(f"{subindent}{f}")

1/
    Tennis Player Actions Dataset for Human Pose Estimation/
        annotations/
            ready_position.json
            serve.json
            forehand.json
            backhand.json
        images/
            ready_position/
            serve/
            forehand/
            backhand/


In [16]:
class_names = ["Backhand", "Forehand"]
global_cat2label = {name: idx for idx, name in enumerate(class_names)}

In [21]:
import json
from torch.utils.data import Dataset
import numpy as np
import torch
from torch.utils.data import DataLoader
import torch.nn as nn

class PoseDataset(Dataset):
    def __init__(self, annotation_file, cat2label, transform=None):
        with open(annotation_file, "r") as f:
            self.coco = json.load(f)

        self.images = {img["id"]: img for img in self.coco["images"]}
        self.annotations = self.coco["annotations"]

        self.cat2label = cat2label
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        ann = self.annotations[idx]
        keypoints = np.array(ann["keypoints"]).reshape(-1, 3)[:, :2]
        category = ann["category_id"]

        # Find the category name from the JSON
        cat_name = next(cat["name"] for cat in self.coco["categories"] if cat["id"] == category)
        label = self.cat2label[cat_name]

        keypoints = torch.tensor(keypoints, dtype=torch.float32).flatten()
        label = torch.tensor(label, dtype=torch.long)

        return keypoints, label


In [22]:
class PoseClassifier(nn.Module):
    def __init__(self, num_classes=2, num_keypoints=18):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(num_keypoints * 2, 128), # 17 joints, all with x,y vals
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.fc(x)

In [23]:
from torch.utils.data import ConcatDataset
from torch.utils.data import random_split

annotation_dir = os.path.join(path, "Tennis Player Actions Dataset for Human Pose Estimation", "annotations")

class_names = ["Backhand", "Forehand"]
global_cat2label = {name: idx for idx, name in enumerate(class_names)}

datasets_list = []
for fname in ["backhand.json", "forehand.json"]:
    dataset_part = PoseDataset(os.path.join(annotation_dir, fname), cat2label=global_cat2label)
    datasets_list.append(dataset_part)

dataset = ConcatDataset(datasets_list)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [26]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = PoseClassifier(num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(40):
    model.train()
    total_loss, correct, total = 0, 0, 0

    for X, y in train_loader:
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += (predicted == y).sum().item()
        total += y.size(0)

    train_acc = correct / total

    # validation
    model.eval()
    val_correct, val_total, val_loss = 0, 0, 0
    with torch.no_grad():
        for X, y in val_loader:
            X, y = X.to(device), y.to(device)
            outputs = model(X)
            loss = criterion(outputs, y)
            val_loss += loss.item()

            _, predicted = outputs.max(1)
            val_correct += (predicted == y).sum().item()
            val_total += y.size(0)

    val_acc = val_correct / val_total

    print(f"Epoch {epoch+1}: "
          f"Train Loss {total_loss/len(train_loader):.4f}, Train Acc {train_acc:.4f}, "
          f"Val Loss {val_loss/len(val_loader):.4f}, Val Acc {val_acc:.4f}")

Epoch 1: Train Loss 11.7031, Train Acc 0.6138, Val Loss 7.1057, Val Acc 0.5150
Epoch 2: Train Loss 4.7155, Train Acc 0.6225, Val Loss 3.3524, Val Acc 0.4900
Epoch 3: Train Loss 2.0592, Train Acc 0.7350, Val Loss 1.8185, Val Acc 0.6800
Epoch 4: Train Loss 1.9573, Train Acc 0.6825, Val Loss 0.8598, Val Acc 0.7700
Epoch 5: Train Loss 1.2033, Train Acc 0.7425, Val Loss 0.5176, Val Acc 0.8550
Epoch 6: Train Loss 0.8959, Train Acc 0.7625, Val Loss 0.4731, Val Acc 0.8450
Epoch 7: Train Loss 0.7618, Train Acc 0.7788, Val Loss 0.4962, Val Acc 0.8550
Epoch 8: Train Loss 0.5693, Train Acc 0.7987, Val Loss 0.4082, Val Acc 0.8900
Epoch 9: Train Loss 0.6438, Train Acc 0.7738, Val Loss 0.6316, Val Acc 0.7150
Epoch 10: Train Loss 0.4890, Train Acc 0.8100, Val Loss 0.3510, Val Acc 0.9000
Epoch 11: Train Loss 0.5043, Train Acc 0.8125, Val Loss 0.2991, Val Acc 0.8850
Epoch 12: Train Loss 0.3682, Train Acc 0.8375, Val Loss 0.2942, Val Acc 0.9100
Epoch 13: Train Loss 0.4025, Train Acc 0.8400, Val Loss 0.45

In [29]:
torch.save(model.state_dict(), "new_pose_classifier.pth")

In [36]:
# test model with keypoints
import torch
import numpy as np

# Example MediaPipe landmarks mapped to COCO order
mapped_landmarks = [
    (0.6435, 0.5777),  # nose
    (0.6419, 0.5665),  # left_eye
    (0.6464, 0.5722),  # right_eye
    (0.6349, 0.5679),  # left_ear
    (0.6432, 0.5792),  # right_ear
    (0.6016, 0.5876),  # left_shoulder
    (0.6296, 0.6395),  # right_shoulder
    (0.6015, 0.6370),  # left_elbow
    (0.6580, 0.7169),  # right_elbow
    (0.6401, 0.6691),  # left_wrist
    (0.6997, 0.7613),  # right_wrist
    (0.5526, 0.7405),  # left_hip
    (0.5679, 0.7619),  # right_hip
    (0.5841, 0.8262),  # left_knee
    (0.5996, 0.8609),  # right_knee
    (0.5792, 0.9532),  # left_ankle
    (0.5829, 0.9725),  # right_ankle
    (0.6156, 0.6135),  # neck (avg shoulders in practice)
]
converted_landmarkes = []

# convert landmarks to 1280 x 720
for x,y in mapped_landmarks:
    x = int(x * 1280)
    y = int(y * 720)
    converted_landmarkes.append((x,y))

x = np.array(converted_landmarkes).flatten()  # shape (36,)
x_tensor = torch.tensor(x, dtype=torch.float32).unsqueeze(0)  # shape (1,36)
x_tensor = x_tensor.to(device) # Move tensor to the same device as the model

model.eval()
with torch.no_grad():
    pred = model(x_tensor)
    predicted_class = pred.softmax(dim=1)

print(predicted_class)

print("Predicted class:", class_names[int(predicted_class.argmax())])

tensor([[0.0316, 0.9684]], device='cuda:0')
Predicted class: Forehand
