# install the tennis court dataset

In [1]:
#just downloading the dataset on my pc

# import Libs

In [12]:
import numpy as np
import cv2
import json
import torch
from torch.utils.data import Dataset , DataLoader
from torchvision  import transforms,models
from torchinfo import summary

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# applying some transformers onto the images and scaling kps 

In [19]:
class KeyPointsDatasets(Dataset):
    def __init__(self, img_dir, data_file):
        self.img_dir = img_dir
        with open(data_file,'r') as f:
            self.data_file = json.load(f)
        # Create a transforms pipeline manually
        self.transforms = transforms.Compose(
            [
                transforms.ToPILImage(),
                transforms.Resize((224,224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ]
        )

    def __len__(self):
        return len(self.data_file)
    
    def __getitem__(self,idx):
        item = self.data_file[idx]
        img = cv2.imread(f"{self.img_dir}/{item['id']}.png")
        high,width =img.shape[:2]
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        img = self.transforms(img)
        # convert to 1D array 
        kps = np.array(item['kps']).flatten()
        kps = kps.astype(np.float32)
        # scaling the keypoints (x,y) from the original image size to the new size
        kps[::2] *= 224.0/width
        kps[1::2] *= 224.0 / high

        return img, kps


# Create Datasets and DataLoaders

In [20]:
train_dataset = KeyPointsDatasets("tennis_court_det_dataset/data/images","tennis_court_det_dataset/data/data_train.json")
val_dataset = KeyPointsDatasets("tennis_court_det_dataset/data/images","tennis_court_det_dataset/data/data_val.json")

train_dataloader = DataLoader(train_dataset,batch_size=8, shuffle= True)
val_dataloader = DataLoader(train_dataset,batch_size=8, shuffle= True)


# Getting a pretrained model

In [14]:
model = models.resnet50(pretrained=True)
summary(model=model, 
        input_size=(32, 3, 224, 224), # make sure this is "input_size", not "input_shape"
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
ResNet (ResNet)                          [32, 3, 224, 224]    [32, 1000]           --                   True
├─Conv2d (conv1)                         [32, 3, 224, 224]    [32, 64, 112, 112]   9,408                True
├─BatchNorm2d (bn1)                      [32, 64, 112, 112]   [32, 64, 112, 112]   128                  True
├─ReLU (relu)                            [32, 64, 112, 112]   [32, 64, 112, 112]   --                   --
├─MaxPool2d (maxpool)                    [32, 64, 112, 112]   [32, 64, 56, 56]     --                   --
├─Sequential (layer1)                    [32, 64, 56, 56]     [32, 256, 56, 56]    --                   True
│    └─Bottleneck (0)                    [32, 64, 56, 56]     [32, 256, 56, 56]    --                   True
│    │    └─Conv2d (conv1)               [32, 64, 56, 56]     [32, 64, 56, 56]     4,096                True
│    │    └─BatchN

In [15]:
print(model.fc)

Linear(in_features=2048, out_features=1000, bias=True)


# transfer learning (replace the last layer to be matched with our features )
we have 14 points in tennis court so the total is 14x2 (because we have (x,y))

In [21]:
model.fc = torch.nn.Linear(model.fc.in_features, 14*2)
model = model.to(device)

# Train the Model

In [22]:
# Define loss and optimizer
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 1e-4)

In [26]:
epochs =20
# Set the random seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)
for epoch in range(epochs):
    print(f"Epoch: {epoch}\n-------")
    ### Training
    train_loss = 0
    # Add a loop to loop through training batches
    for batch, (imgs, kps) in enumerate(train_dataloader):
        imgs = imgs.to(device)
        kps = kps.to(device)
        model.train() 
        # 1. Forward pass
        y_pred = model(imgs)

        # 2. Calculate loss (per batch)
        loss = loss_fn(y_pred, kps)
        train_loss += loss.item() # accumulatively add up the loss per epoch 

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Print out how many samples have been seen
        if batch % 10 == 0:
            print(f"Looked at {batch * len(imgs)}/{len(train_dataloader.dataset)} samples")

    # Divide total train loss by length of train dataloader (average loss per batch per epoch)
    train_loss /= len(train_dataloader)

    ## Print out what's happening
    print(f"\nTrain loss: {train_loss:.5f}")

Epoch: 0
-------
Looked at 0/6630 samples
Looked at 80/6630 samples
Looked at 160/6630 samples
Looked at 240/6630 samples
Looked at 320/6630 samples
Looked at 400/6630 samples
Looked at 480/6630 samples
Looked at 560/6630 samples
Looked at 640/6630 samples
Looked at 720/6630 samples
Looked at 800/6630 samples
Looked at 880/6630 samples
Looked at 960/6630 samples
Looked at 1040/6630 samples
Looked at 1120/6630 samples
Looked at 1200/6630 samples
Looked at 1280/6630 samples
Looked at 1360/6630 samples
Looked at 1440/6630 samples
Looked at 1520/6630 samples
Looked at 1600/6630 samples
Looked at 1680/6630 samples
Looked at 1760/6630 samples
Looked at 1840/6630 samples
Looked at 1920/6630 samples
Looked at 2000/6630 samples
Looked at 2080/6630 samples
Looked at 2160/6630 samples
Looked at 2240/6630 samples
Looked at 2320/6630 samples
Looked at 2400/6630 samples
Looked at 2480/6630 samples
Looked at 2560/6630 samples
Looked at 2640/6630 samples
Looked at 2720/6630 samples
Looked at 2800/6630

In [None]:
torch.save(model.state_dict(), "kps_model.pth")

: 