In [3]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from PIL import Image

In [4]:
# Recreate the model architecture
model = models.resnet18(weights=None)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # 2 classes: forehand/backhand

In [5]:
model.load_state_dict(torch.load("forehand_backhand_model.pth", map_location=torch.device('cpu')))
model.eval()  # set to inference mode

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [None]:
import cv2
from PIL import Image
import torch

videoPath = "videos/fh_bh_testvid.MOV"

vidcap = cv2.VideoCapture(videoPath)
frame_count = 0

while True:
    success, frame = vidcap.read()
    if not success:
        break

    if frame_count % 10 == 0:  # every 10th frame
        # Convert OpenCV BGR -> RGB for model
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Convert NumPy array to PIL Image
        image = Image.fromarray(frame_rgb)
        
        # Apply transform and predict
        image_tensor = transform(image).unsqueeze(0).to(device)  # [1, C, H, W]

        model.eval()
        with torch.no_grad():
            outputs = model(image_tensor)
            probabilities = torch.nn.functional.softmax(outputs, dim=1)
            confidence, predicted = torch.max(probabilities, 1)
            classes = ['backhand', 'forehand']
            predicted_class = classes[int(predicted.item())]
            confidence_score = confidence.item()

        # Draw prediction on frame (original BGR frame for display)
        text = f"{predicted_class}: {confidence_score:.2f}"
        cv2.putText(frame, text, (30, 200), cv2.FONT_HERSHEY_SIMPLEX, 5, (0, 255, 0), 5)
        cv2.putText(frame, f"Frame: {frame_count}", (30, 300), cv2.FONT_HERSHEY_SIMPLEX, 5, (255, 255, 255), 5)

    # Display frame
    cv2.imshow('Table Tennis Stroke Classification', frame)
    
    # Press ESC to exit
    key = cv2.waitKey(30) & 0xFF
    if key == 27:  # ESC key
        break

    frame_count += 1

vidcap.release()
cv2.destroyAllWindows()