In [1]:
import torch

In [13]:
import numpy as np

In [3]:
torch.backends.mps.is_available()

True

In [8]:
torch.backends.mps

<module 'torch.backends.mps' from '/opt/anaconda3/envs/Advanced_AI/lib/python3.10/site-packages/torch/backends/mps/__init__.py'>

## 1. Pytorch tensor
tensor is similar to ndarray in numpy, but can be used on GPU for accelerated computation.

You can make pytorch tensor with tuple, list or ndarray.
But if you're prototyping or just trying things out, list and tuple are great, when you're **scaling up or converting between libraries, ndarrays are your friend**.

In [10]:
torch.tensor([6,2])

tensor([6, 2])

In [11]:
torch.tensor((6,2))

tensor([6, 2])

In [14]:
torch.tensor(np.array([1,2,3]))

tensor([1, 2, 3])

Quick methods to create tensor, like the routine methods in numpy: ones, zeros, full, eye, random.....

In [15]:
#2rows and 2 columns of random numbers
torch.rand((2,3)) 

tensor([[0.6186, 0.1565, 0.7201],
        [0.8034, 0.7467, 0.1416]])

In [16]:
torch.rand(2,3)

tensor([[0.3854, 0.0727, 0.4925],
        [0.4008, 0.7764, 0.4998]])

In [17]:
#normal distribution, mean 0 and standard deviation 1.
torch.randn(2,3)

tensor([[-0.1616, -0.5160, -0.6437],
        [ 1.8353, -0.3077,  1.1880]])

In [20]:
#specify mean and sd of a normal distribution
mean = torch.tensor([4.0])
std = torch.tensor([0.5])
torch.normal(mean, std)

tensor([4.6131])

In [21]:
mean = torch.full((2, 3), 4.0)   # shape [2, 3] with all values = 4.0
std = torch.full((2, 3), 0.5)    # shape [2, 3] with all values = 0.5
torch.normal(mean, std)

tensor([[4.1373, 4.2235, 3.0460],
        [4.2595, 4.0126, 4.0550]])

In [23]:
#generate zeros:
torch.zeros(2,3)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [25]:
#generat oens:
torch.ones(2,3)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [26]:
#shape of tensor
x = torch.ones(2,3,4)
x.shape

torch.Size([2, 3, 4])

In [27]:
x.size()

torch.Size([2, 3, 4])

In [29]:
x.size(-1)

4

## 2. Basic data type in tensor
- torch.float32
- torch.float64
- torch.int16
- torch.int32
- torch.int64

In [33]:
#we can assign datatype when creating a tensor
torch.tensor([2.2,3.1,4.2],dtype=torch.int32)

tensor([2, 3, 4], dtype=torch.int32)

In [34]:
torch.tensor([3,36,2],dtype=torch.float32)

tensor([ 3., 36.,  2.])

In [39]:
a = np.random.randn(2,3)
a

array([[ 0.03003491,  0.445328  ,  1.63826447],
       [ 1.82780283,  0.15699914, -1.12034901]])

In [41]:
#tensor and ndarray can be converted very conveniently
torch.from_numpy(a)

tensor([[ 0.0300,  0.4453,  1.6383],
        [ 1.8278,  0.1570, -1.1203]], dtype=torch.float64)

In [42]:
torch.from_numpy(a).numpy()

array([[ 0.03003491,  0.445328  ,  1.63826447],
       [ 1.82780283,  0.15699914, -1.12034901]])

## 3. tensor calculation

In [43]:
#very similar to ndarray calculation

In [44]:
t= torch.ones(2,3)
t

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [45]:
t+3

tensor([[4., 4., 4.],
        [4., 4., 4.]])

In [47]:
torch.add(t,3.3)

tensor([[4.3000, 4.3000, 4.3000],
        [4.3000, 4.3000, 4.3000]])

In [48]:
s = torch.randn(2,3)
s

tensor([[-0.2800, -0.9281,  2.2013],
        [-0.2760, -0.3447,  0.8974]])

In [50]:
#to add, 2 tensors must have same dimensions
t+s

tensor([[0.7200, 0.0719, 3.2013],
        [0.7240, 0.6553, 1.8974]])

In [54]:
t.add(s)

tensor([[0.7200, 0.0719, 3.2013],
        [0.7240, 0.6553, 1.8974]])

In [55]:
t

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [57]:
#use add_ to make the result inplace
t.add_(s)

tensor([[0.7200, 0.0719, 3.2013],
        [0.7240, 0.6553, 1.8974]])

In [58]:
t

tensor([[0.7200, 0.0719, 3.2013],
        [0.7240, 0.6553, 1.8974]])

In [63]:
#add a row
t = torch.cat((t, torch.tensor([[1.2, 0.5, 3.3]])), dim=0)
t

tensor([[0.7200, 0.0719, 3.2013],
        [0.7240, 0.6553, 1.8974],
        [1.2000, 0.5000, 3.3000]])

In [64]:
#get inverse matrix
torch.linalg.inv(t)

tensor([[-2.4638, -2.7675,  3.9814],
        [ 0.2280,  2.9749, -1.9317],
        [ 0.8614,  0.5556, -0.8521]])

In [66]:
#reshape the tensor
t.reshape(1,9)

tensor([[0.7200, 0.0719, 3.2013, 0.7240, 0.6553, 1.8974, 1.2000, 0.5000, 3.3000]])

In [68]:
t.view(1,9)

tensor([[0.7200, 0.0719, 3.2013, 0.7240, 0.6553, 1.8974, 1.2000, 0.5000, 3.3000]])

In [69]:
#group calculation
t.sum()

tensor(12.2699)

In [72]:
#sum by column
t.sum(dim=1)

tensor([3.9932, 3.2767, 5.0000])

In [71]:
#sum by row
t.sum(dim=0)

tensor([2.6440, 1.2272, 8.3987])

In [76]:
#one number is scaler in tensor, a list in tensor is a vector
#get the scaler in a vector
t[1,1]

tensor(0.6553)

In [77]:
t[1:,1]

tensor([0.6553, 0.5000])

In [78]:
#get 2 vectors from a high dimension tensor
y = torch.rand(32,224,224,3)
y

tensor([[[[2.0061e-01, 7.9530e-01, 7.3930e-01],
          [2.4570e-01, 2.2051e-01, 5.8009e-01],
          [4.9909e-01, 3.4959e-01, 5.9674e-01],
          ...,
          [1.6430e-01, 7.9924e-01, 7.1956e-01],
          [4.0765e-01, 2.3549e-01, 1.6730e-01],
          [8.7160e-01, 4.0136e-01, 2.9066e-01]],

         [[4.3076e-01, 3.5878e-01, 5.1189e-01],
          [9.5760e-01, 3.7720e-01, 2.4108e-01],
          [3.3484e-01, 4.0078e-01, 6.3168e-03],
          ...,
          [2.9728e-01, 2.0507e-01, 9.5897e-01],
          [7.0334e-01, 1.8418e-01, 7.0403e-01],
          [4.6337e-01, 4.0884e-01, 4.7130e-01]],

         [[2.8847e-01, 2.6105e-01, 6.7080e-02],
          [6.1003e-01, 7.0735e-01, 2.3956e-01],
          [4.6404e-01, 6.1810e-01, 1.3958e-01],
          ...,
          [6.6332e-01, 1.1271e-01, 9.0695e-01],
          [6.1870e-02, 3.9535e-01, 7.5297e-01],
          [9.3660e-01, 8.4075e-01, 5.0868e-01]],

         ...,

         [[7.4214e-01, 7.7396e-01, 5.6260e-01],
          [2.5839e-01,

In [84]:
#get the first row only 
y[0,:,:,0]

tensor([[0.2006, 0.2457, 0.4991,  ..., 0.1643, 0.4076, 0.8716],
        [0.4308, 0.9576, 0.3348,  ..., 0.2973, 0.7033, 0.4634],
        [0.2885, 0.6100, 0.4640,  ..., 0.6633, 0.0619, 0.9366],
        ...,
        [0.7421, 0.2584, 0.9152,  ..., 0.1500, 0.1936, 0.1340],
        [0.2856, 0.6538, 0.4895,  ..., 0.7398, 0.5841, 0.4900],
        [0.0089, 0.8250, 0.8739,  ..., 0.6813, 0.8600, 0.1329]])

In [85]:
z = torch.rand(3,4)
z

tensor([[0.0395, 0.4420, 0.9425, 0.7827],
        [0.0407, 0.3778, 0.5099, 0.4515],
        [0.4273, 0.7993, 0.7548, 0.3881]])

In [88]:
z[0:2,:]

tensor([[0.0395, 0.4420, 0.9425, 0.7827],
        [0.0407, 0.3778, 0.5099, 0.4515]])

In [89]:
#matrix multipilcation

In [90]:
print(t.shape, z.shape)

torch.Size([3, 3]) torch.Size([3, 4])


In [92]:
torch.matmul(t,z)

tensor([[1.3994, 2.9042, 3.1315, 1.8384],
        [0.8661, 2.0842, 2.4486, 1.5989],
        [1.4779, 3.3570, 3.8767, 2.4457]])

In [93]:
t@z

tensor([[1.3994, 2.9042, 3.1315, 1.8384],
        [0.8661, 2.0842, 2.4486, 1.5989],
        [1.4779, 3.3570, 3.8767, 2.4457]])

In [94]:
#dot only applies to one dimensional vectors
t[0,:]

tensor([0.7200, 0.0719, 3.2013])

In [95]:
z[:,0]

tensor([0.0395, 0.0407, 0.4273])

In [96]:
t[0,:].dot(z[:,0])

tensor(1.3994)

## 4. Auto-differentiation

In [125]:
#allow to track x's grad/differentiation
x = torch.ones(2, 2, requires_grad=True)
y = x**2 + 2
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [126]:
x.requires_grad

True

In [127]:
y.requires_grad

True

In [128]:
z= torch.ones(2,3)
z.requires_grad

False

In [129]:
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [130]:
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [131]:
#the target of differentiation must be scalar, not vector
y.max().backward()

In [132]:
#each value of x changes, how much y.max() will be affected, it is (1/4)*y'(x) = (1/4)*2x = 0.5x
x.grad

tensor([[0.5000, 0.5000],
        [0.5000, 0.5000]])

This auto differentiation is very convenient for back propagation.

In [169]:
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
# y is not leaf node as x, to get differentiation, we must retain the grad of y
y.retain_grad()
z=y*y*3

In [170]:
z

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)

In [171]:
z.mean().backward()

In [172]:
print(y.grad) 

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [173]:
x.grad

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

In [175]:
# use 'with torch.no_grad()' not to have differentiation temporarily
with torch.no_grad():
    print((x**2).requires_grad)

False


In [178]:
x = torch.ones(2, 2, requires_grad=True)
#detach() will also disable the permit to differentiate when generating a new tensor
y = (x + 2).detach()
y.requires_grad

False

In [180]:
a = torch.randn(2,2)
#change a's permit to differentiate on the go
a.requires_grad_(True)

tensor([[ 1.9235,  1.3625],
        [-0.1835,  1.1163]], requires_grad=True)