In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import json
import cv2
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


In [3]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [4]:

# Dataset Class
class KeypointsDataset(Dataset):
    def __init__(self, img_dir, data_file):
        self.img_dir = img_dir
        with open(data_file, "r") as f:
            self.data = json.load(f)

        self.transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        img = cv2.imread(f"{self.img_dir}/{item['id']}.png")
        h, w = img.shape[:2]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transforms(img)
        kps = np.array(item['kps']).flatten().astype(np.float32)
        kps[::2] *= 224.0 / w  # x coords
        kps[1::2] *= 224.0 / h  # y coords
        return img, kps


In [5]:

# Load datasets
train_dataset = KeypointsDataset("/kaggle/input/court-detection-dataset/data/images", "/kaggle/input/court-detection-dataset/data/data_train.json")
val_dataset = KeypointsDataset("/kaggle/input/court-detection-dataset/data/images", "/kaggle/input/court-detection-dataset/data/data_val.json")

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)


In [6]:

# Model definition
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 14 * 2)
model = model.to(device)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 197MB/s]


In [7]:

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    for i, (imgs, kps) in enumerate(train_loader):
        imgs, kps = imgs.to(device), kps.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, kps)
        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print(f"Epoch {epoch}, Iter {i}, Loss: {loss.item():.4f}")



Epoch 0, Iter 0, Loss: 14888.6953
Epoch 0, Iter 10, Loss: 14665.0918
Epoch 0, Iter 20, Loss: 14701.4639
Epoch 0, Iter 30, Loss: 13670.7793
Epoch 0, Iter 40, Loss: 13289.1367
Epoch 0, Iter 50, Loss: 12903.1338
Epoch 0, Iter 60, Loss: 12344.7793
Epoch 0, Iter 70, Loss: 12420.1699
Epoch 0, Iter 80, Loss: 12231.5264
Epoch 0, Iter 90, Loss: 11073.3916
Epoch 0, Iter 100, Loss: 13511.1592
Epoch 0, Iter 110, Loss: 11431.8154
Epoch 0, Iter 120, Loss: 11261.2930
Epoch 0, Iter 130, Loss: 10245.1152
Epoch 0, Iter 140, Loss: 9645.1738
Epoch 0, Iter 150, Loss: 9415.5186
Epoch 0, Iter 160, Loss: 9044.5107
Epoch 0, Iter 170, Loss: 8723.1445
Epoch 0, Iter 180, Loss: 7968.4453
Epoch 0, Iter 190, Loss: 7483.4541
Epoch 0, Iter 200, Loss: 7612.3760
Epoch 0, Iter 210, Loss: 7751.8047
Epoch 0, Iter 220, Loss: 7177.9458
Epoch 0, Iter 230, Loss: 6926.0708
Epoch 0, Iter 240, Loss: 6873.0181
Epoch 0, Iter 250, Loss: 6526.0615
Epoch 0, Iter 260, Loss: 6265.6558
Epoch 0, Iter 270, Loss: 6350.7915
Epoch 0, Iter 280

In [8]:
# Save the model as pkl
torch.save(model.state_dict(), "keypoints_model.pkl")

In [9]:
# Evaluation metrics
def evaluate(model, loader, threshold=5):
    model.eval()
    preds = []
    gts = []

    with torch.no_grad():
        for imgs, kps in loader:
            imgs = imgs.to(device)
            outputs = model(imgs).cpu().numpy()
            kps = kps.numpy()
            
            for pred_kps, gt_kps in zip(outputs, kps):
                pred_kps = pred_kps.reshape(-1, 2)
                gt_kps = gt_kps.reshape(-1, 2)
                for pk, gk in zip(pred_kps, gt_kps):
                    dist = np.linalg.norm(pk - gk)
                    preds.append(dist < threshold)
                    gts.append(True)  # All ground truths are positive in this setup

    acc = accuracy_score(gts, preds)
    prec = precision_score(gts, preds)
    rec = recall_score(gts, preds)
    f1 = f1_score(gts, preds)

    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1 Score: {f1:.4f}")


In [10]:
# Run evaluation
evaluate(model, val_loader)

Accuracy: 0.9852
Precision: 1.0000
Recall: 0.9852
F1 Score: 0.9925
