In [5]:
import torch
import torchvision.models as models
import torchvision
from PIL import Image

In [2]:

class Model(torch.nn.Module):

    def __init__(self):
        super(Model, self).__init__()
        
        # Create a VGG16 network
        self.vgg16 = torch.hub.load('pytorch/vision:v0.6.0', 'vgg16', pretrained=False)
        
        # There are 16 keypoints to detect, each keypoint having 3 atributtes:
        # 1. x coordinate
        # 2. y coordinate
        # 3. a "state" (visible or not) A state of 0 means the joint either does not 
        #   exist or is outside of the image's bounds, 1 denotes a joint that is inside 
        #   of the image but cannot be seen because the part of the object it belongs 
        #   to is not visible in the image, and 2 means the joint was present and visible.
        #   (TODO: this should be one-hot encoded or use embeddings instead of a single number)
        num_out_features = 16 * 3

        # Replace the last layer of the VGG16 network with a linear layer
        self.vgg16.classifier[-1] = torch.nn.Linear(in_features=4096, out_features=num_out_features, bias=True)

    def forward(self, x):

        y_pred = self.vgg16(x)
        return y_pred

In [3]:
model = Model()

Downloading: "https://github.com/pytorch/vision/zipball/v0.6.0" to /Users/tleyden/.cache/torch/hub/v0.6.0.zip


In [28]:
# Load an image from a file
img = Image.open("/Users/tleyden/Library/Application Support/DefaultCompany/TennisCourt/solo_3/sequence.0/step0.camera.png")

# Resize it to 224x224
#img = torchvision.transforms.functional.resize(img, (224, 224))

# convert it to a tensor
img_tensor = torchvision.transforms.functional.to_tensor(img)

print(img_tensor.shape)


torch.Size([4, 769, 1532])


In [20]:
model

Model(
  (vgg16): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=True)
      (16): Ma

In [29]:
# Get rid of the alpha channel dimension
# Old shape: torch.Size([4, 769, 1532])  (channels, height, width)
# New shape: torch.Size([3, 769, 1532])
img_tensor = img_tensor[:3, :, :]
img_tensor.shape

torch.Size([3, 769, 1532])

In [30]:
# Add a "batch" dimension as expected by the model
batch = img_tensor.unsqueeze(0)
batch.shape

torch.Size([1, 3, 769, 1532])

In [31]:
model(batch)

tensor([[-0.0102,  0.0059,  0.0053,  0.0172,  0.0196,  0.0097, -0.0121, -0.0184,
         -0.0108,  0.0036,  0.0139, -0.0121,  0.0245, -0.0017,  0.0038,  0.0042,
          0.0074,  0.0231,  0.0197,  0.0035, -0.0039,  0.0002,  0.0054, -0.0017,
          0.0003, -0.0544, -0.0077,  0.0041, -0.0054, -0.0004, -0.0215, -0.0020,
         -0.0051,  0.0042,  0.0226, -0.0271,  0.0246, -0.0182,  0.0157, -0.0147,
          0.0147,  0.0079,  0.0239,  0.0147, -0.0066, -0.0221, -0.0007, -0.0219]],
       grad_fn=<AddmmBackward0>)

In [21]:
16 * 3

48

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import ToTensor, Normalize
from torch.utils.data import DataLoader

In [2]:
loss = nn.CrossEntropyLoss()

In [3]:
input = torch.randn(3, 5, requires_grad=True)

In [14]:
input

tensor([[ 1.6629,  0.2976, -0.1444,  1.2981,  0.8015],
        [-0.4406,  0.8616, -1.6612,  0.0551,  1.6870],
        [ 0.1893, -1.2243,  0.4126, -0.3632,  0.1342]], requires_grad=True)

In [15]:
target = torch.empty(3, dtype=torch.long).random_(5)

In [19]:
target[0] = 0
target

tensor([0, 4, 2])

In [20]:
output = loss(input, target)
output

tensor(0.8929, grad_fn=<NllLossBackward0>)

In [11]:
output.backward()

In [13]:
input.grad

tensor([[ 0.1314, -0.2998,  0.0216,  0.0913,  0.0555],
        [-0.3111,  0.0817,  0.0066,  0.0365,  0.1864],
        [ 0.0830,  0.0202,  0.1038,  0.0478, -0.2548]])

In [26]:
input2 = torch.zeros(1, 5)
input2[0][0] = 1.0
input2

tensor([[1., 0., 0., 0., 0.]])

In [29]:
target = torch.tensor([0], dtype=torch.long)

In [30]:
output = loss(input2, target)
output

tensor(0.9048)

In [40]:
#y_pred = torch.tensor([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
y_pred = torch.tensor([[0.0, 10000000.0, 0.0], [0.0, 100000000000.0, 0.0]])

y_true = torch.tensor([1, 1])  
#nn.NLLLoss()(torch.log(y_pred), y_true)  
loss(y_pred, y_true)

tensor(0.)

In [35]:
help(nn.NLLLoss)

Help on class NLLLoss in module torch.nn.modules.loss:

class NLLLoss(_WeightedLoss)
 |  NLLLoss(weight: Optional[torch.Tensor] = None, size_average=None, ignore_index: int = -100, reduce=None, reduction: str = 'mean') -> None
 |  
 |  The negative log likelihood loss. It is useful to train a classification
 |  problem with `C` classes.
 |  
 |  If provided, the optional argument :attr:`weight` should be a 1D Tensor assigning
 |  weight to each of the classes. This is particularly useful when you have an
 |  unbalanced training set.
 |  
 |  The `input` given through a forward call is expected to contain
 |  log-probabilities of each class. `input` has to be a Tensor of size either
 |  :math:`(minibatch, C)` or :math:`(minibatch, C, d_1, d_2, ..., d_K)`
 |  with :math:`K \geq 1` for the `K`-dimensional case. The latter is useful for
 |  higher dimension inputs, such as computing NLL loss per-pixel for 2D images.
 |  
 |  Obtaining log-probabilities in a neural network is easily achieve

In [41]:
def myfun():
    stuff = ["a", "b"]
    return *stuff

a, b = myfun()
print("{} {}".format(a, b))

a b


In [44]:
labels = torch.tensor([0, 1, 2, 0], dtype=torch.long)
labels

tensor([0, 1, 2, 0])

In [45]:
labels.dtype


torch.int64

In [46]:
torch.random(2, 5)

TypeError: 'module' object is not callable

In [54]:
t = torch.randn(48)

In [55]:
u = t.view(-1, 3)

In [56]:
u.shape

torch.Size([16, 3])

In [57]:
u

tensor([[ 1.1213, -0.4936, -1.5684],
        [-0.3683, -1.2497,  0.7882],
        [-1.3564, -1.3743, -0.8679],
        [-0.6571,  0.9354,  0.1843],
        [ 0.2186,  0.3012, -0.8825],
        [-0.0624,  0.7111,  0.1023],
        [ 1.2326,  0.3668, -0.6789],
        [-0.1812, -1.0728,  0.3258],
        [ 0.0730, -0.3137, -0.3616],
        [-1.1036, -0.0938,  1.1916],
        [-0.1156, -0.4488, -0.5394],
        [-2.2633,  1.1004, -0.0695],
        [ 1.7269,  0.5774, -1.2874],
        [-0.4169,  0.0701,  0.0324],
        [-1.1299,  0.3667,  1.6003],
        [ 0.3772,  1.4335, -1.7017]])

In [61]:
x = torch.randn(32, 16)
x.shape

torch.Size([32, 16])

In [62]:
y = torch.randn(32, 48)
y.shape

torch.Size([32, 48])

In [64]:
y_reshaped = y.view(-1, 16, 3)
y_reshaped.shape

torch.Size([32, 16, 3])

In [66]:
targets = torch.randn(32, 16)
targets.shape


torch.Size([32, 16])

In [None]:
image = torch.randn(32, 16)

## How does cross-entropy loss work when things don't line up?

# Suppose we have two types categories we want to predict:
# Car Type: [Sports, Truck, Sedan]
# Car Genre: [Humble, Normal, Primadonna]

# Sample inputs 
inputs = torch.tensor(
    []
)