In [5]:
import torch
import torchvision.models as models
import torchvision
from PIL import Image

In [2]:

class Model(torch.nn.Module):

    def __init__(self):
        super(Model, self).__init__()
        
        # Create a VGG16 network
        self.vgg16 = torch.hub.load('pytorch/vision:v0.6.0', 'vgg16', pretrained=False)
        
        # There are 16 keypoints to detect, each keypoint having 3 atributtes:
        # 1. x coordinate
        # 2. y coordinate
        # 3. a "state" (visible or not) A state of 0 means the joint either does not 
        #   exist or is outside of the image's bounds, 1 denotes a joint that is inside 
        #   of the image but cannot be seen because the part of the object it belongs 
        #   to is not visible in the image, and 2 means the joint was present and visible.
        #   (TODO: this should be one-hot encoded or use embeddings instead of a single number)
        num_out_features = 16 * 3

        # Replace the last layer of the VGG16 network with a linear layer
        self.vgg16.classifier[-1] = torch.nn.Linear(in_features=4096, out_features=num_out_features, bias=True)

    def forward(self, x):

        y_pred = self.vgg16(x)
        return y_pred

In [3]:
model = Model()

Downloading: "https://github.com/pytorch/vision/zipball/v0.6.0" to /Users/tleyden/.cache/torch/hub/v0.6.0.zip


In [28]:
# Load an image from a file
img = Image.open("/Users/tleyden/Library/Application Support/DefaultCompany/TennisCourt/solo_3/sequence.0/step0.camera.png")

# Resize it to 224x224
#img = torchvision.transforms.functional.resize(img, (224, 224))

# convert it to a tensor
img_tensor = torchvision.transforms.functional.to_tensor(img)

print(img_tensor.shape)


torch.Size([4, 769, 1532])


In [20]:
model

Model(
  (vgg16): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=True)
      (16): Ma

In [29]:
img_tensor = img_tensor[:3, :, :]
img_tensor.shape

torch.Size([3, 769, 1532])

In [30]:
batch = img_tensor.unsqueeze(0)
batch.shape

torch.Size([1, 3, 769, 1532])

In [31]:
model(batch)

tensor([[-0.0102,  0.0059,  0.0053,  0.0172,  0.0196,  0.0097, -0.0121, -0.0184,
         -0.0108,  0.0036,  0.0139, -0.0121,  0.0245, -0.0017,  0.0038,  0.0042,
          0.0074,  0.0231,  0.0197,  0.0035, -0.0039,  0.0002,  0.0054, -0.0017,
          0.0003, -0.0544, -0.0077,  0.0041, -0.0054, -0.0004, -0.0215, -0.0020,
         -0.0051,  0.0042,  0.0226, -0.0271,  0.0246, -0.0182,  0.0157, -0.0147,
          0.0147,  0.0079,  0.0239,  0.0147, -0.0066, -0.0221, -0.0007, -0.0219]],
       grad_fn=<AddmmBackward0>)

In [21]:
16 * 3

48