In [None]:
import torch
import torchvision.transforms as transforms
import cv2
from torchvision import models
import numpy as np

class CourtLineDetector:
    def __init__(self, model_path):
        self.model = models.resnet50(pretrained=True)
        self.model.fc = torch.nn.Linear(self.model.fc.in_features, 14*2) 
        self.model.load_state_dict(torch.load(model_path, map_location='cpu'))

    def transform_img(self, img):

        img = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])(img)

        return img

    def predict(self, image):

    
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image_tensor = self.transform_img(image_rgb).unsqueeze(0)
        with torch.no_grad():
            outputs = self.model(image_tensor)
        keypoints = outputs.squeeze().cpu().numpy()
        original_h, original_w = image.shape[:2]
        keypoints[::2] *= original_w / 224.0
        keypoints[1::2] *= original_h / 224.0

        return keypoints

    def draw_keypoints(self, image, keypoints):
        for i in range(0, len(keypoints), 2):
            x = int(keypoints[i])
            y = int(keypoints[i+1])
            cv2.putText(image, str(i//2), (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
            cv2.circle(image, (x, y), 5, (0, 0, 255), -1)
        return image
    
    def draw_keypoints_on_video(self, video_frames, keypoints):
        output_video_frames = []
        for frame in video_frames:
            frame = self.draw_keypoints(frame, keypoints)
            output_video_frames.append(frame)
        return output_video_frames

In [15]:
import torch
import torchvision.transforms as transforms
import cv2
from torchvision import models
import numpy as np

In [16]:
model = models.resnet152(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 28)
model.load_state_dict(torch.load('../models/keypoints_court.pth', map_location='cpu'))



<All keys matched successfully>

In [17]:
image = cv2.imread('../input/image.png')
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

In [22]:
img = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])(image_rgb)
img

tensor([[[-1.2617, -1.2617, -1.2274,  ..., -1.1075, -1.1247, -1.1760],
         [-1.0390, -1.0390, -0.9877,  ..., -0.8335, -0.8507, -0.9363],
         [-1.0390, -0.9877, -0.9020,  ..., -0.8507, -0.8507, -0.9363],
         ...,
         [-0.1828, -0.1828, -0.1828,  ..., -0.2513, -0.2513, -0.3541],
         [-0.1828, -0.1828, -0.1828,  ..., -0.2513, -0.2513, -0.3541],
         [-0.1828, -0.1999, -0.1828,  ..., -0.2856, -0.2856, -0.3883]],

        [[-1.7381, -1.7381, -1.7556,  ..., -1.6681, -1.6856, -1.7206],
         [-1.8431, -1.8431, -1.8431,  ..., -1.7731, -1.7906, -1.8081],
         [-1.8431, -1.7556, -1.6155,  ..., -1.7906, -1.7906, -1.8081],
         ...,
         [ 0.6429,  0.6429,  0.6429,  ...,  0.5378,  0.5378,  0.3978],
         [ 0.6429,  0.6429,  0.6429,  ...,  0.5378,  0.5378,  0.3978],
         [ 0.6429,  0.6254,  0.6429,  ...,  0.5378,  0.5028,  0.3627]],

        [[-1.4733, -1.4559, -1.4733,  ..., -1.2467, -1.2990, -1.3164],
         [-1.5604, -1.5604, -1.5953,  ..., -1

In [23]:
img = img.unsqueeze(0)
img

tensor([[[[-1.2617, -1.2617, -1.2274,  ..., -1.1075, -1.1247, -1.1760],
          [-1.0390, -1.0390, -0.9877,  ..., -0.8335, -0.8507, -0.9363],
          [-1.0390, -0.9877, -0.9020,  ..., -0.8507, -0.8507, -0.9363],
          ...,
          [-0.1828, -0.1828, -0.1828,  ..., -0.2513, -0.2513, -0.3541],
          [-0.1828, -0.1828, -0.1828,  ..., -0.2513, -0.2513, -0.3541],
          [-0.1828, -0.1999, -0.1828,  ..., -0.2856, -0.2856, -0.3883]],

         [[-1.7381, -1.7381, -1.7556,  ..., -1.6681, -1.6856, -1.7206],
          [-1.8431, -1.8431, -1.8431,  ..., -1.7731, -1.7906, -1.8081],
          [-1.8431, -1.7556, -1.6155,  ..., -1.7906, -1.7906, -1.8081],
          ...,
          [ 0.6429,  0.6429,  0.6429,  ...,  0.5378,  0.5378,  0.3978],
          [ 0.6429,  0.6429,  0.6429,  ...,  0.5378,  0.5378,  0.3978],
          [ 0.6429,  0.6254,  0.6429,  ...,  0.5378,  0.5028,  0.3627]],

         [[-1.4733, -1.4559, -1.4733,  ..., -1.2467, -1.2990, -1.3164],
          [-1.5604, -1.5604, -

In [26]:
with torch.no_grad():
    outputs = model(img)
keypoints = outputs.squeeze().cpu().numpy()
original_h, original_w = image.shape[:2]
keypoints[::2] *= original_w / 224.0
keypoints[1::2] *= original_h / 224.0

In [27]:
keypoints

array([ 757.4582 ,  347.92404, 1732.629  ,  348.60803,  397.66937,
        988.1057 , 2098.9026 ,  987.0559 ,  880.38904,  348.12756,
        610.7749 ,  988.03864, 1610.8871 ,  348.57687, 1885.55   ,
        987.0967 ,  841.40985,  442.05725, 1650.9088 ,  442.4646 ,
        703.9565 ,  766.8081 , 1788.7946 ,  766.71173, 1246.5435 ,
        442.07144, 1245.2081 ,  766.70776], dtype=float32)