# Pytorch

PyTorch is a Python-based scientific computing package serving two broad purposes:

- A replacement for NumPy to use the power of GPUs and other accelerators.
- An automatic differentiation library that is useful to implement neural networks.

To install $\tt{pytorch}$ on your machine follow the instruction here: https://pytorch.org/

Load the relevant libraries

In [None]:
import torch
import numpy as np

#### Creating tensors

In [None]:
# define a tensor manually
t = torch.tensor([1.4, 2.2, 3.5])

# create it from numpy arrays
a  = np.random.random((10,1))             #define the numpy array
t1 = torch.tensor(a)                      #create tensor; data type is the same as numpy array
t2 = torch.tensor(a, dtype=torch.float32) #specify data type if needed
t3 = torch.Tensor(a)                      #same as t1 but will set dtype to be float32 (standard pytorch dtype)
t4 = torch.as_tensor(a)                   #same dtype as numpy array
t5 = torch.from_numpy(a)                  #same dtype as numpy array
# t1, t2, and t3 will create a copy of the data a
# t4 and t5 will share the data with a

# lets see the data and their type
print('a=',a)
print('a type:',a.dtype)
print('t1=',t1)
print('data type:',t1.dtype)
print('t2=',t2)
print('data type:',t2.dtype)
print('t3=',t3)
print('data type:',t3.dtype)
print('t4=',t4)
print('data type:',t4.dtype)
print('t5=',t5)
print('data type:',t5.dtype)

a= [[0.56457933]
 [0.08321875]
 [0.2832025 ]
 [0.02460751]
 [0.8432173 ]
 [0.61016326]
 [0.94959666]
 [0.50300741]
 [0.67701491]
 [0.02432517]]
a type: float64
t1= tensor([[0.5646],
        [0.0832],
        [0.2832],
        [0.0246],
        [0.8432],
        [0.6102],
        [0.9496],
        [0.5030],
        [0.6770],
        [0.0243]], dtype=torch.float64)
data type: torch.float64
t2= tensor([[0.5646],
        [0.0832],
        [0.2832],
        [0.0246],
        [0.8432],
        [0.6102],
        [0.9496],
        [0.5030],
        [0.6770],
        [0.0243]])
data type: torch.float32
t3= tensor([[0.5646],
        [0.0832],
        [0.2832],
        [0.0246],
        [0.8432],
        [0.6102],
        [0.9496],
        [0.5030],
        [0.6770],
        [0.0243]])
data type: torch.float32
t4= tensor([[0.5646],
        [0.0832],
        [0.2832],
        [0.0246],
        [0.8432],
        [0.6102],
        [0.9496],
        [0.5030],
        [0.6770],
        [0.0243]], dtyp

In [None]:
# to convert a torch tensor to numpy:
t = torch.tensor([1.4, 2.2, 3.5])
n = t.numpy()
print('numpy array',n,'type',n.dtype)

numpy array [1.4 2.2 3.5] type float32


#### Tensor properties

In [None]:
# tensor attributes
print('t2 shape:',t2.shape)
print('t2 type:',t2.dtype)
print('device where t2 is allocated:',t2.device)

t2 shape: torch.Size([10, 1])
t2 type: torch.float32
device where t2 is allocated: cpu


#### Useful functions

In [None]:
a = torch.eye(2)     #creates diagonal matrix with 2x2 elements: [[1.,0.],[0.,1.]]
b = torch.zeros(2,2) #fill a 2x2 matrix with zeros
c = torch.ones(2,2)  #fill a 2x2 matrix with ones
print('a =',a)
print('b =',b)
print('c =',c)
print(a.dtype, b.dtype, c.dtype)

a = tensor([[1., 0.],
        [0., 1.]])
b = tensor([[0., 0.],
        [0., 0.]])
c = tensor([[1., 1.],
        [1., 1.]])
torch.float32 torch.float32 torch.float32


#### Random numbers

In [None]:
seed = 1
num_random = 10
torch.manual_seed(seed)
t_uniform = torch.rand(num_random)
print(t_uniform)
mu = 2.
sigma = 3.
t_normal = torch.normal(mu,sigma,size=(1,num_random))
print(t_normal)

tensor([0.7576, 0.2793, 0.4031, 0.7347, 0.0293, 0.7999, 0.3971, 0.7544, 0.5695,
        0.4388])
tensor([[ 4.5770,  4.1168,  0.9783, -1.8160, -1.5845,  2.0751, -0.2881,  6.1908,
          1.0265,  2.8636]])


#### More pytorch functions

In [None]:
# get a tensor
t = torch.tensor([[1.1, 2.1, 3.5],[-1.0, 0.0, 0.9],[3.1, -8.2, 7.2]], dtype=torch.float32)
print(t.shape)
print(t,'\n')

# print the results of some conditional operations (0-False, 1-True)
print('where t==0:')
print(t.eq(0),'\n')  #where the tensor is equal to 0
print('where t>=0:')
print(t.ge(0),'\n')  #where the tensor is equal or greater than 0
print('where t>0:')
print(t.gt(0),'\n')  #where the tensor is greater than 0
print('where t<=0:')
print(t.le(0),'\n')  #where the tensor is equal or less than 0
print('where t<0:')
print(t.lt(0),'\n')  #where the tensor is less than

# frequently used operations
print('Total sum:',t.sum())
print('Sum along first axis:',t.sum(dim=0))
print('Mean value:',t.mean())
print('standard deviation:',t.std())
print('Maximum value:', t.max())
print('Maximum value along first axis:',t.max(dim=0))
print('index of the maximum value in the tensor:',t.argmax()) #gives the index of the maximum value in t
print('transpose of tensor:')
print(t.t(),'\n') #transpose of a tensor

# other operations
print('abs(t):')
print(t.abs(),'\n') #absolute value
print('sqrt(t):')
print(t.sqrt(),'\n')
print('-t:')
print(t.neg(),'\n')  #return the negative values of the tensor
print('t0*t1*t2*....:')
print(t.prod(),'\n') #product of all elements

# get the scalar value, not a tensor (NOTE: it works only for tensors with one element)
print('mean value (scalar):',t.mean().item())

torch.Size([3, 3])
tensor([[ 1.1000,  2.1000,  3.5000],
        [-1.0000,  0.0000,  0.9000],
        [ 3.1000, -8.2000,  7.2000]]) 

where t==0:
tensor([[False, False, False],
        [False,  True, False],
        [False, False, False]]) 

where t>=0:
tensor([[ True,  True,  True],
        [False,  True,  True],
        [ True, False,  True]]) 

where t>0:
tensor([[ True,  True,  True],
        [False, False,  True],
        [ True, False,  True]]) 

where t<=0:
tensor([[False, False, False],
        [ True,  True, False],
        [False,  True, False]]) 

where t<0:
tensor([[False, False, False],
        [ True, False, False],
        [False,  True, False]]) 

Total sum: tensor(8.7000)
Sum along first axis: tensor([ 3.2000, -6.1000, 11.6000])
Mean value: tensor(0.9667)
standard deviation: tensor(4.1827)
Maximum value: tensor(7.2000)
Maximum value along first axis: torch.return_types.max(
values=tensor([3.1000, 2.1000, 7.2000]),
indices=tensor([2, 0, 2]))
index of the maximum value in

#### Indexing and reshaping tensors

In [None]:
# define a tensor
t = torch.rand((10,3))
print('Tensor t:')
print(t,'\n')

# tensors can also be indexed as numpy arrays
print('Third component of tensor t:')
print(t[:,2],'\n')

# reshape/stack
print('Reshaping tensor into 5x6 tensor:')
print(t.reshape(5,6),'\n')
print('Reshaping tensor into 1x30 tensor:')
print(t.reshape(1,-1),'\n') #for the second dimension pytorch will figure out the correct number

# flatten a tensor
print('Flattening tensor:')
print(t.view(-1))

Tensor t:
tensor([[0.7242, 0.2094, 0.6845],
        [0.1917, 0.6557, 0.3600],
        [0.6072, 0.8516, 0.6257],
        [0.7929, 0.3663, 0.5092],
        [0.5924, 0.7022, 0.4002],
        [0.7222, 0.1637, 0.7718],
        [0.9940, 0.5772, 0.3789],
        [0.8765, 0.8261, 0.4247],
        [0.2286, 0.0068, 0.9782],
        [0.3179, 0.4056, 0.1151]]) 

Third component of tensor t:
tensor([0.6845, 0.3600, 0.6257, 0.5092, 0.4002, 0.7718, 0.3789, 0.4247, 0.9782,
        0.1151]) 

Reshaping tensor into 5x6 tensor:
tensor([[0.7242, 0.2094, 0.6845, 0.1917, 0.6557, 0.3600],
        [0.6072, 0.8516, 0.6257, 0.7929, 0.3663, 0.5092],
        [0.5924, 0.7022, 0.4002, 0.7222, 0.1637, 0.7718],
        [0.9940, 0.5772, 0.3789, 0.8765, 0.8261, 0.4247],
        [0.2286, 0.0068, 0.9782, 0.3179, 0.4056, 0.1151]]) 

Reshaping tensor into 1x30 tensor:
tensor([[0.7242, 0.2094, 0.6845, 0.1917, 0.6557, 0.3600, 0.6072, 0.8516, 0.6257,
         0.7929, 0.3663, 0.5092, 0.5924, 0.7022, 0.4002, 0.7222, 0.1637, 0.7

#### Broadcasting

In [None]:
# pytorch will do automatic broadcasting
t1 = torch.tensor([[1,1],[1,1]], dtype=torch.float32)
t2 = torch.tensor([2,4], dtype=torch.float32)
# Pytorch only supports operations between same data type tensors (float,int...)
print('t1 =',t1)
print('t2 =',t2)
print('t1+t2 =',t1 + t2)
print('t1*t2 =',t1 * t2)

# above, t2 is broadcasted to the shape of t1. To see what it is doing use this
t2_broadcasted=np.broadcast_to(t2.numpy(), t1.shape)
print(t2_broadcasted)

t1 = tensor([[1., 1.],
        [1., 1.]])
t2 = tensor([2., 4.])
t1+t2 = tensor([[3., 5.],
        [3., 5.]])
t1*t2 = tensor([[2., 4.],
        [2., 4.]])
[[2. 4.]
 [2. 4.]]


In [None]:
# Note that the dimensions of the two tensor should match at non-singleton dimension - i.e.  where the dimensions are different from one - for the broadcast to work:
t1 = torch.tensor([[1,1,1],[1,1,1]], dtype=torch.float32)
t2 = torch.tensor([2,4], dtype=torch.float32)

# # In this case the non-singleton dimension does not match
# print('t1 =',t1)
# print('t2 =',t2)
# print('t1.shape',t1.shape,'t2.shape',t2.shape)
# print('t1+t2 =',t1 + t2)

# In this case the non-singleton dimension matchs
print('t1.T =',t1.T)
print('t2 =',t2)
print('t1.shape',t1.T.shape,'t2.shape',t2.shape)
print('t1+t2 =',t1.T + t2)

t1.T = tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
t2 = tensor([2., 4.])
t1.shape torch.Size([3, 2]) t2.shape torch.Size([2])
t1+t2 = tensor([[3., 5.],
        [3., 5.],
        [3., 5.]])


#### Squeezing and unsqueezing

In [None]:
# create a tensor
t = torch.rand(10,3)
print(t)
print(t.shape)

# unsqueeze it (add an extra dimension along first axis)
t1 = t.unsqueeze(0) #specify the dimension in the parenthesis
print(t1)
print(t1.shape)

# squeeze it (remove dimension along first axis)
t2 = t1.squeeze_(0)
print(t2)
print(t2.shape)

tensor([[0.0640, 0.6681, 0.5514],
        [0.2704, 0.0460, 0.0156],
        [0.0896, 0.2860, 0.8514],
        [0.1860, 0.3764, 0.1275],
        [0.1657, 0.0880, 0.1324],
        [0.0609, 0.2640, 0.5648],
        [0.0995, 0.9622, 0.2611],
        [0.9181, 0.3376, 0.0313],
        [0.5563, 0.3957, 0.3443],
        [0.0338, 0.3193, 0.3338]])
torch.Size([10, 3])
tensor([[[0.0640, 0.6681, 0.5514],
         [0.2704, 0.0460, 0.0156],
         [0.0896, 0.2860, 0.8514],
         [0.1860, 0.3764, 0.1275],
         [0.1657, 0.0880, 0.1324],
         [0.0609, 0.2640, 0.5648],
         [0.0995, 0.9622, 0.2611],
         [0.9181, 0.3376, 0.0313],
         [0.5563, 0.3957, 0.3443],
         [0.0338, 0.3193, 0.3338]]])
torch.Size([1, 10, 3])
tensor([[0.0640, 0.6681, 0.5514],
        [0.2704, 0.0460, 0.0156],
        [0.0896, 0.2860, 0.8514],
        [0.1860, 0.3764, 0.1275],
        [0.1657, 0.0880, 0.1324],
        [0.0609, 0.2640, 0.5648],
        [0.0995, 0.9622, 0.2611],
        [0.9181, 0.3376, 0

#### Stack and concatenate tensors

In [None]:
# generate two tensors
t1 = torch.zeros(5,2)
t2 = torch.ones(5,2)
print('t1:',t1)
print('t2:',t2)

# stack two tensors
t = torch.stack((t1,t2))
print('t1 t2 stacked:',t,'shape',t.shape)

# concatenate along some dimension
t = torch.cat((t1,t2),dim=0)
print('t1 t2 concatenated:',t,'shape',t.shape)

t1: tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])
t2: tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]])
t1 t2 stacked: tensor([[[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]],

        [[1., 1.],
         [1., 1.],
         [1., 1.],
         [1., 1.],
         [1., 1.]]]) shape torch.Size([2, 5, 2])
t1 t2 concatenated: tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]]) shape torch.Size([10, 2])


#### Move data to/from GPU from/to CPU
In order to enable the usage of a GPU:

Runtime -----> Change runtime type -----> Harwdare accelerator -----> GPU

In [None]:
import torch
import numpy as np

# find the device
if torch.cuda.is_available():
    print("CUDA Available")
    device = torch.device('cuda')
else:
    print('CUDA Not Available')
    device = torch.device('cpu')

# data can be created with numpy in a CPU
data = np.random.random((12,2,3)).astype(np.float32)

# create a torch tensor
data = torch.tensor(data)
print('data is located in:',data.device)

# move the tensor to the GPU
data = data.to(device)
print('data is located in:',data.device)

# move a tensor to CPU
data = (data.cpu())
print('data is located in:',data.device)

# transform tensor to numpy array
data = data.numpy()
print(data)

CUDA Available
data is located in: cpu
data is located in: cuda:0
data is located in: cpu
[[[0.10211503 0.38396102 0.8287932 ]
  [0.06607524 0.47719672 0.93415195]]

 [[0.07820678 0.07893415 0.34563246]
  [0.97677094 0.7854215  0.3837668 ]]

 [[0.6939686  0.5540304  0.7352282 ]
  [0.9159746  0.0769287  0.2910081 ]]

 [[0.77650595 0.9956301  0.11880538]
  [0.37620354 0.7485959  0.33982742]]

 [[0.33691338 0.24326767 0.83621645]
  [0.6818596  0.9254298  0.5633282 ]]

 [[0.54119277 0.27168715 0.2053981 ]
  [0.06946807 0.6055843  0.73311937]]

 [[0.6811511  0.41332123 0.7441994 ]
  [0.51792604 0.47063282 0.4662997 ]]

 [[0.2623345  0.52072644 0.0486535 ]
  [0.86208904 0.3234983  0.05580842]]

 [[0.2523877  0.4404661  0.9529301 ]
  [0.9354438  0.9198461  0.3507259 ]]

 [[0.43801883 0.34412053 0.76697403]
  [0.90787464 0.23190951 0.4072504 ]]

 [[0.7928627  0.7115617  0.82601154]
  [0.53435624 0.98163295 0.02476547]]

 [[0.28431007 0.6822642  0.47597733]
  [0.7105993  0.6914443  0.3180777 ]]

#### For backpropagation we need to keep the gradients








In [None]:
# define a tensor as part of graph
t = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)

# check if a tensor has gradients
t.requires_grad

True

### **Exercise**: Generate a mock traning set as follows:
- $\mathbf{x}$: 100 points random points following uniform distribution between 0 and 5,
- $\mathbf{y}(x)$: 100 point following a Gaussian distribution with mean $5+3x$ and standard deviation $0.3$.
- Plot the data (import matplotlib.pyplot as plt).
- Put that data into a tensor and move it to the GPU.
- Compute the mean and the standard deviation in the GPU and print the results.