In [42]:
import torch

Tensors are similar to numpy arrays except that they can also be used on a GPU to accelerate training.

In [5]:
x = torch.empty(5,3) # an uninitialized matrix (whatever is in that memory at allocation time will be shown)
x
x.size()

torch.Size([5, 3])

In [6]:
x = torch.rand(5,3) # randomly initialized matrix
x

tensor([[0.8453, 0.8739, 0.6486],
        [0.6273, 0.7985, 0.3823],
        [0.7679, 0.6167, 0.5071],
        [0.0537, 0.6804, 0.0287],
        [0.1253, 0.7947, 0.4586]])

In [9]:
x = torch.zeros(5,3, dtype=torch.float) # matrix of 0s of type float
x

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [3]:
x = torch.tensor([[5,3,1],[2,1,3],[2,3,4],[10,2,4],[1,2,3]])
x

tensor([[ 5,  3,  1],
        [ 2,  1,  3],
        [ 2,  3,  4],
        [10,  2,  4],
        [ 1,  2,  3]])

In [4]:
print(x.size())

torch.Size([5, 3])


In [13]:
len(x.size())

2

In [6]:
x.shape

torch.Size([5, 3])

Rank is the number of axes or dimensions of a tensor. Shape is the size of each axis of a tensor.

In [7]:
y = torch.rand(5,3)

In [8]:
print(y)

tensor([[0.8295, 0.7259, 0.2555],
        [0.4262, 0.6398, 0.5064],
        [0.7124, 0.1758, 0.4427],
        [0.5058, 0.4910, 0.8530],
        [0.2962, 0.0466, 0.9212]])


In [9]:
y.size(), y.shape

(torch.Size([5, 3]), torch.Size([5, 3]))

In [10]:
z = x+y; z

tensor([[5.7886e+22, 1.1866e+27, 5.0849e+31],
        [4.6114e+24, 1.8370e+25, 4.9640e+28],
        [4.6114e+24, 7.5244e+28, 6.7131e+22],
        [5.0578e-01, 7.7050e+31, 6.7415e+22],
        [3.0951e+32, 4.6557e-02, 7.6197e+31]])

In [11]:
print(x+y)

tensor([[5.7886e+22, 1.1866e+27, 5.0849e+31],
        [4.6114e+24, 1.8370e+25, 4.9640e+28],
        [4.6114e+24, 7.5244e+28, 6.7131e+22],
        [5.0578e-01, 7.7050e+31, 6.7415e+22],
        [3.0951e+32, 4.6557e-02, 7.6197e+31]])


In [12]:
print(torch.add(x,y))

tensor([[5.7886e+22, 1.1866e+27, 5.0849e+31],
        [4.6114e+24, 1.8370e+25, 4.9640e+28],
        [4.6114e+24, 7.5244e+28, 6.7131e+22],
        [5.0578e-01, 7.7050e+31, 6.7415e+22],
        [3.0951e+32, 4.6557e-02, 7.6197e+31]])


In [14]:
result = torch.empty(5,3)
torch.add(x, y, out=result); result

tensor([[5.7886e+22, 1.1866e+27, 5.0849e+31],
        [4.6114e+24, 1.8370e+25, 4.9640e+28],
        [4.6114e+24, 7.5244e+28, 6.7131e+22],
        [5.0578e-01, 7.7050e+31, 6.7415e+22],
        [3.0951e+32, 4.6557e-02, 7.6197e+31]])

In [15]:
y.add_(x); y

tensor([[5.7886e+22, 1.1866e+27, 5.0849e+31],
        [4.6114e+24, 1.8370e+25, 4.9640e+28],
        [4.6114e+24, 7.5244e+28, 6.7131e+22],
        [5.0578e-01, 7.7050e+31, 6.7415e+22],
        [3.0951e+32, 4.6557e-02, 7.6197e+31]])

In [30]:
print(x[:,1])

tensor([1.1866e+27, 1.8370e+25, 7.5244e+28, 7.7050e+31, 3.2603e-12])


If you want to resize or reshape a tensor, you can use torch.view

In [34]:
x = torch.randn(4,4)
print(x.shape, x)
y = x.view(16)
print(y.shape, y)
z = x.view(-1,8)
print(z.shape, z)

torch.Size([4, 4]) tensor([[-1.7088,  0.5793, -0.4688, -0.1824],
        [-1.4856, -0.0107, -0.5985,  1.4128],
        [ 1.2690,  0.0769,  0.8904, -0.5708],
        [ 0.2742, -0.0532,  0.1620,  0.4353]])
torch.Size([16]) tensor([-1.7088,  0.5793, -0.4688, -0.1824, -1.4856, -0.0107, -0.5985,  1.4128,
         1.2690,  0.0769,  0.8904, -0.5708,  0.2742, -0.0532,  0.1620,  0.4353])
torch.Size([2, 8]) tensor([[-1.7088,  0.5793, -0.4688, -0.1824, -1.4856, -0.0107, -0.5985,  1.4128],
        [ 1.2690,  0.0769,  0.8904, -0.5708,  0.2742, -0.0532,  0.1620,  0.4353]])


In [35]:
x = torch.randn(1)
print(x)
print(x.item())

tensor([0.1828])
0.18279820680618286


In [36]:
a = torch.ones(5)
a

tensor([1., 1., 1., 1., 1.])

In [37]:
b = a.numpy()
b

array([1., 1., 1., 1., 1.], dtype=float32)

In [38]:
a.add_(1)

tensor([2., 2., 2., 2., 2.])

In [39]:
b

array([2., 2., 2., 2., 2.], dtype=float32)

In [40]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [41]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(x, device=device)
    x = x.to(device)
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))

GeForce RTX 2080 with CUDA capability sm_75 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 compute_37.
If you want to use the GeForce RTX 2080 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



tensor([1.1828], device='cuda:0')
tensor([1.1828], dtype=torch.float64)


In [44]:
print(x.grad_fn)

None


In [46]:
x = torch.ones(2,2,requires_grad=True)

In [47]:
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [48]:
y = x + 2; y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [49]:
y.grad_fn

<AddBackward0 at 0x7f8d48dd5040>

In [50]:
z = 3 * y**2
out = z.mean()
print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


In [51]:
print(z)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)


In [52]:
print(out)

tensor(27., grad_fn=<MeanBackward0>)


out is a function of z, z is a function of y and y is a function of x. 

out.backward() == out.calculate_grad will result in d(out)/dz but z is a function of x as follows:

out = 1/4 * sum(zi) = 1/4 * sum(3yi**2) = 1/4 * sum (3*(xi + 2)**2) over xi

d(out)/dxi = 1/4 * 3 * 2 * (xi + 2) = 3/2 * (xi + 2)

d(out)/dxi at xi = 1 = 9/2 = 4.5


In [53]:
out.backward()
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [64]:
x = torch.randn(3, requires_grad=True)
y = x * 2 
print(y.data.norm())

tensor(4.2492)


In [65]:
print(x)

tensor([ 1.8936, -0.8457,  0.4614], requires_grad=True)


In [66]:
print(y.data.norm())

tensor(4.2492)


In [67]:
print(y)

tensor([ 3.7872, -1.6914,  0.9229], grad_fn=<MulBackward0>)


In [68]:
print(y.data)

tensor([ 3.7872, -1.6914,  0.9229])


In [70]:
print(y.data.sum())

tensor(3.0187)


In [71]:
z = y**2
print(z.sum()/3)

tensor(6.0184, grad_fn=<DivBackward0>)


In [72]:
print(y.abs().sum())

tensor(6.4015, grad_fn=<SumBackward0>)


In [73]:
print(y.data.abs().sum())

tensor(6.4015)


In [74]:
print(y.data.norm())

tensor(4.2492)


In [76]:
from numpy.linalg import norm

In [78]:
z = y.detach()

In [79]:
print(norm(z.numpy()))

4.249159


In [80]:
z = (y**2).sum().sqrt()

In [81]:
print(z)

tensor(4.2492, grad_fn=<SqrtBackward>)


In [82]:
while y.data.norm() < 1000:
    y = y * 2

print(y)

tensor([ 969.5195, -432.9989,  236.2613], grad_fn=<MulBackward0>)


In [83]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(v)

print(x.grad)

tensor([5.1200e+01, 5.1200e+02, 5.1200e-02])


In [84]:
print(x.requires_grad)
print((x**2).requires_grad)

True
True


In [85]:
with torch.no_grad():
    print((x**2).requires_grad)

False


In [86]:
y = x.detach()
print(y.requires_grad)

False


In [88]:
print(x.eq(y))

tensor([True, True, True])


In [89]:
a = torch.rand(4,3, requires_grad=True)

In [90]:
print(a)

tensor([[0.3904, 0.4011, 0.2205],
        [0.5616, 0.0634, 0.5357],
        [0.4688, 0.4647, 0.7372],
        [0.7134, 0.9872, 0.9314]], requires_grad=True)


In [91]:
b = a*5; print(b)

tensor([[1.9519, 2.0055, 1.1025],
        [2.8080, 0.3169, 2.6785],
        [2.3439, 2.3234, 3.6861],
        [3.5668, 4.9362, 4.6572]], grad_fn=<MulBackward0>)


b is a vector so we cannot get a (scalar) gradient (value) for it.

In [95]:
v = torch.ones_like(b, dtype=torch.float); print(v.size())

torch.Size([4, 3])


In [96]:
b.backward(v)

In [97]:
print(a.grad)

tensor([[5., 5., 5.],
        [5., 5., 5.],
        [5., 5., 5.],
        [5., 5., 5.]])


In [98]:
a = torch.tensor(3.); print(a)

tensor(3.)


In [99]:
a.shape

torch.Size([])

In [100]:
a.size()

torch.Size([])

In [101]:
len(a.shape)

0

In [102]:
time = torch.arange(0,20).float(); time

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
        14., 15., 16., 17., 18., 19.])

In [103]:
speed = torch.randn(20)*3 + 0.5*(time-9.5)**2 + 1; speed 

tensor([45.9069, 37.8696, 24.0082, 21.3935, 16.0913, 12.8971,  6.2238,  3.4989,
         3.6581,  2.0035,  4.2657,  6.6305,  2.4293, 10.8488, 10.0749, 19.3579,
        22.3606, 28.8011, 32.8464, 49.0653])

In [104]:
def f(t, params):
    a,b,c = params
    return a*(t**2) + (b*t) + c

In [105]:
def mse(preds, targets): return ((preds - targets)**2).mean()

In [107]:
params = torch.randn(3).requires_grad_(); params

tensor([-0.4833, -0.3321,  0.0554], requires_grad=True)

In [108]:
preds = f(time, params)

In [109]:
loss = mse(preds, speed); loss

tensor(10401.5020, grad_fn=<MeanBackward0>)

In [110]:
print(preds)

tensor([ 5.5370e-02, -7.6009e-01, -2.5422e+00, -5.2910e+00, -9.0064e+00,
        -1.3688e+01, -1.9337e+01, -2.5953e+01, -3.3535e+01, -4.2083e+01,
        -5.1599e+01, -6.2081e+01, -7.3530e+01, -8.5945e+01, -9.9327e+01,
        -1.1368e+02, -1.2899e+02, -1.4527e+02, -1.6252e+02, -1.8074e+02],
       grad_fn=<AddBackward0>)


In [111]:
len(loss.shape)

0

In [112]:
len(preds.shape)

1

In [113]:
loss.backward(); params.grad

tensor([-33764.8906,  -2173.4111,   -161.6050])

In [114]:
lr = 1e-5
params.data -= lr * params.grad.data
params.grad = None

In [115]:
preds = f(time, params); preds

tensor([ 5.6986e-02, -3.9909e-01, -1.1465e+00, -2.1853e+00, -3.5155e+00,
        -5.1370e+00, -7.0498e+00, -9.2541e+00, -1.1750e+01, -1.4537e+01,
        -1.7615e+01, -2.0985e+01, -2.4646e+01, -2.8598e+01, -3.2842e+01,
        -3.7377e+01, -4.2203e+01, -4.7321e+01, -5.2730e+01, -5.8431e+01],
       grad_fn=<AddBackward0>)

In [116]:
loss = mse(preds, speed); loss

tensor(2187.3979, grad_fn=<MeanBackward0>)

In [117]:
def apply_step(params, prn=True):
    preds = f(time, params)
    loss = mse(preds, speed)
    if prn: print(loss.item())
    loss.backward()
    params.data -= lr * params.grad.data 
    params.grad = None
    return preds

In [118]:
for i in range(10): apply_step(params)

2187.39794921875
633.0361938476562
338.9017639160156
283.24078369140625
272.70623779296875
270.71099853515625
270.3316650390625
270.2581481933594
270.2424621582031
270.2376708984375


In [128]:
a = torch.randn((2*2,1)); a

tensor([[ 1.0270],
        [ 1.5007],
        [ 0.9591],
        [-0.4100]])

In [123]:
a*1

tensor([[-1.5576],
        [-0.6271],
        [ 1.0489],
        [-0.3268]])

In [125]:
b = torch.rand((2*2,1)); b

tensor([[0.3414],
        [0.6773],
        [0.2568],
        [0.4548]])

In [130]:
b = torch.randn(1); b

tensor([-0.9026])

In [131]:
a.shape, b.shape

(torch.Size([4, 1]), torch.Size([1]))

In [137]:
a.T

tensor([[ 1.0270,  1.5007,  0.9591, -0.4100]])

In [134]:
c = torch.randn(4); c

tensor([0.6270, 2.1091, 0.6473, 2.0235])

In [138]:
d = c*a.T; d

tensor([[ 0.6439,  3.1651,  0.6209, -0.8296]])

In [139]:
d.shape

torch.Size([1, 4])

In [140]:
0.6270*1.0270

0.643929

In [142]:
e = d.sum(); e

tensor(3.6003)

In [145]:
f = e + b; f

tensor([2.6977])

In [146]:
f.shape

torch.Size([1])

In [148]:
trgts = torch.tensor([1, 0, 1]); trgts

tensor([1, 0, 1])

In [149]:
trgts==1

tensor([ True, False,  True])

In [151]:
prds = torch.tensor([0.9, 0.4, 0.2]); prds

tensor([0.9000, 0.4000, 0.2000])

In [152]:
1-prds

tensor([0.1000, 0.6000, 0.8000])

In [153]:
help(torch.where)

Help on built-in function where:

where(...)
    where(condition, x, y) -> Tensor
    
    Return a tensor of elements selected from either :attr:`x` or :attr:`y`, depending on :attr:`condition`.
    
    The operation is defined as:
    
    .. math::
        \text{out}_i = \begin{cases}
            \text{x}_i & \text{if } \text{condition}_i \\
            \text{y}_i & \text{otherwise} \\
        \end{cases}
    
    .. note::
        The tensors :attr:`condition`, :attr:`x`, :attr:`y` must be :ref:`broadcastable <broadcasting-semantics>`.
    
    Arguments:
        condition (BoolTensor): When True (nonzero), yield x, otherwise yield y
        x (Tensor): values selected at indices where :attr:`condition` is ``True``
        y (Tensor): values selected at indices where :attr:`condition` is ``False``
    
    Returns:
        Tensor: A tensor of shape equal to the broadcasted shape of :attr:`condition`, :attr:`x`, :attr:`y`
    
    Example::
    
        >>> x = torch.randn(3, 2)
  

In [154]:
x = torch.randn(3,2); y = torch.ones(3,2); x

tensor([[ 0.3146,  0.2119],
        [-1.0202, -0.4158],
        [ 0.4631, -0.9585]])

In [155]:
torch.where(x>0, x, y)

tensor([[0.3146, 0.2119],
        [1.0000, 1.0000],
        [0.4631, 1.0000]])

In [156]:
torch.where(trgts==1, 1-prds, prds)

tensor([0.1000, 0.4000, 0.8000])

The function above will reward predictions for a 1 if they are close to a 1 (by doing 1-predictions and using the difference to 1 as the value) and correspondingly, if the prediction for a 0 is close to 0, then it will also result in a "low" value so that the overall sum or mean is low. Low loss means the predictions are close to what they are predicting.

In [157]:
def mnist_loss(predictions, targets):
    return torch.where(targets==1, 1-predictions, predictions).mean()

In [158]:
def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions)