### imports


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split

### define transforms

In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Standard ImageNet normalization
                         std=[0.229, 0.224, 0.225])
])

### load dataset, this uses ImageFolder so that forehand and backhand will automatically become the labels

In [3]:
dataset = datasets.ImageFolder(root='/content/drive/MyDrive/dataset', transform=transform)

### create train, validation, test sets

In [4]:
from torch.utils.data import random_split

# Define lengths
total_size = len(dataset)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size  # remaining 15%

# Split the dataset
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print("Classes:", dataset.classes)
print(f"Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}")


Classes: ['backhand', 'forehand']
Train: 682, Val: 146, Test: 147


### define the model

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Pretrained ResNet18
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # 2 classes: forehand/backhand
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 192MB/s]


### optimizer, loss

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

### training

In [7]:
num_epochs = 5  # Adjust as needed

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

    # Validation
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_acc = val_correct / val_total
    print(f"Validation Accuracy: {val_acc:.4f}")


Epoch 1/5 - Loss: 0.0457, Accuracy: 0.9824
Validation Accuracy: 1.0000
Epoch 2/5 - Loss: 0.0007, Accuracy: 1.0000
Validation Accuracy: 1.0000
Epoch 3/5 - Loss: 0.0002, Accuracy: 1.0000
Validation Accuracy: 1.0000
Epoch 4/5 - Loss: 0.0002, Accuracy: 1.0000
Validation Accuracy: 1.0000
Epoch 5/5 - Loss: 0.0003, Accuracy: 1.0000
Validation Accuracy: 1.0000


In [None]:
### test set

In [8]:
model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_acc = test_correct / test_total
print(f"Final Test Accuracy: {test_acc:.4f}")

Final Test Accuracy: 1.0000


In [9]:
torch.save(model.state_dict(), "forehand_backhand_model.pth")

In [10]:
from google.colab import files
files.download("forehand_backhand_model.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### test with a new image

In [23]:
from PIL import Image

# Same transform as training
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load your new image
img_path = "/content/forehand_blackshirt.MOV_11.jpg"
# img_path = "/content/fh_test.jpeg"

image = Image.open(img_path).convert('RGB')  # ensure 3 channels

# Apply transform and add batch dimension
image = transform(image).unsqueeze(0).to(device)  # [1, C, H, W]

# Make prediction
model.eval()
with torch.no_grad():
    outputs = model(image)
    _, predicted = torch.max(outputs, 1)
    print(outputs)
    predicted_class = dataset.classes[predicted.item()]  # dataset from ImageFolder

print("Predicted class:", predicted_class)


tensor([[-7.2700,  5.1628]], device='cuda:0')
Predicted class: forehand


### do a test with a video, frame by frame

In [25]:
import cv2
from PIL import Image
import torch

videoPath = "/content/fh_bh_testvid.MOV"

vidcap = cv2.VideoCapture(videoPath)
frame_count = 0

while True:
    success, frame = vidcap.read()
    if not success:
        break

    if frame_count % 10 == 0:  # every 10th frame
        # Convert OpenCV BGR -> RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Convert NumPy array to PIL Image
        image = Image.fromarray(frame_rgb)

        # Apply transform and predict
        image = transform(image).unsqueeze(0).to(device)  # [1, C, H, W]

        model.eval()
        with torch.no_grad():
            outputs = model(image)
            _, predicted = torch.max(outputs, 1)
            predicted_class = dataset.classes[predicted.item()]

        print(f"Frame {frame_count}: {predicted_class}")

    frame_count += 1

vidcap.release()

Frame 0: forehand
Frame 10: forehand
Frame 20: forehand
Frame 30: forehand
Frame 40: forehand
Frame 50: forehand
Frame 60: forehand
Frame 70: backhand
Frame 80: backhand
Frame 90: backhand
Frame 100: backhand
Frame 110: backhand
Frame 120: backhand
Frame 130: forehand
Frame 140: forehand
Frame 150: forehand
Frame 160: forehand
Frame 170: forehand
Frame 180: forehand
Frame 190: forehand
Frame 200: forehand
Frame 210: backhand
Frame 220: backhand
Frame 230: backhand
Frame 240: backhand
Frame 250: backhand
Frame 260: forehand
Frame 270: forehand
Frame 280: forehand
Frame 290: forehand
Frame 300: forehand
Frame 310: forehand
Frame 320: backhand
Frame 330: backhand
Frame 340: backhand
Frame 350: backhand
Frame 360: backhand
Frame 370: forehand
Frame 380: forehand
Frame 390: forehand
Frame 400: forehand
Frame 410: forehand
Frame 420: forehand
