In [49]:
import torch 
from torch.utils.data import Dataset, DataLoader 
from torchvision import models, transforms 
import json
import cv2 
import numpy as np 





In [50]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Create Torch Dataset

In [54]:
class Keypoints(Dataset):
    def __init__(self, img_dir, data_file):
        self.img_dir = img_dir
        with open(data_file, "r") as f:
            self.data = json.load(f)
        
        self.transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean = [0.49, 0.45, 0.4], std=[0.23, 0.225,0.224])

        ])
    
    def __len__(self):
        return len(self.data)
                
    def __getitem__(self, idx):
        item = self.data[idx]
        img = cv2.imread(f"{self.img_dir}/{item['id']}.png")

        if img is None:
            raise ValueError(f"Image at {self.img_dir}/{item['id']}.png could not be loaded.")

       
        h, w = img.shape[:2] #height and width of image
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transforms(img)
        kps = np.array(item['kps']).flatten()
        kps = kps.astype(np.float32)

        kps[::2] *= 224 / w #adjust x coordinates after transformation to 224x224
        kps[1::2] *= 224 / h #adjust y coordinates

        return img, kps




In [55]:
train_dataset = Keypoints("tennis_court_det_dataset/data/images", "tennis_court_det_dataset/data/data_train.json")
val_dataset = Keypoints("tennis_court_det_dataset/data/images", "tennis_court_det_dataset/data/data_val.json")

train_loader = DataLoader(train_dataset, batch_size = 8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = 8, shuffle=True)

# Create Model

In [56]:
model = models.resnet50(pretrained=True)
#replaces last layer 14 is number of keypoints (x,y coordinate) on the tennis court
model.fc = torch.nn.Linear(model.fc.in_features, 14*2) 



In [57]:
model = model.to(device)

# Train model


In [58]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)

In [59]:
epochs = 20
for epoch in range(epochs):
    for i, (img, kps) in enumerate(train_loader):
        img = img.to(device)
        kps = kps.to(device)
        
        optimizer.zero_grad()
        outputs = model(img)
        loss = criterion(outputs, kps)
        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print(f"Epcoch {epoch}, iter {i}, loss {loss.item()}")

Epcoch 0, iter 0, loss 14541.5947265625
Epcoch 0, iter 10, loss 14266.8154296875
Epcoch 0, iter 20, loss 14293.3134765625
Epcoch 0, iter 30, loss 14820.3701171875
Epcoch 0, iter 40, loss 13329.806640625
Epcoch 0, iter 50, loss 12874.27734375
Epcoch 0, iter 60, loss 12379.6474609375
Epcoch 0, iter 70, loss 11667.591796875
Epcoch 0, iter 80, loss 11633.2451171875
Epcoch 0, iter 90, loss 11044.826171875
Epcoch 0, iter 100, loss 11315.2216796875
Epcoch 0, iter 110, loss 11036.2470703125
Epcoch 0, iter 120, loss 10634.67578125
Epcoch 0, iter 130, loss 10159.4267578125
Epcoch 0, iter 140, loss 9671.05859375
Epcoch 0, iter 150, loss 9237.056640625
Epcoch 0, iter 160, loss 8965.0068359375
Epcoch 0, iter 170, loss 8608.181640625
Epcoch 0, iter 180, loss 8447.568359375
Epcoch 0, iter 190, loss 7912.4287109375
Epcoch 0, iter 200, loss 7990.23876953125
Epcoch 0, iter 210, loss 8317.884765625
Epcoch 0, iter 220, loss 7191.78515625
Epcoch 0, iter 230, loss 6998.5693359375
Epcoch 0, iter 240, loss 67

In [61]:
torch.save(model.state_dict(), "keypoints_model.pth")