# PyTorch Library (Part 1)

Import these libraries and modules:

In [None]:
import torch
import torch.nn as nn
import numpy as np
import time

# NumPy Arrays and PyTorch Tensors

Tensors are VERY fundamental to deep learning models or neural networks in PyTorch. In many ways, they are very similar to NumPy arrays.

In [None]:
n = np.linspace(0, 10, 50)
t = torch.linspace(0, 10, 50)

In [5]:
t

tensor([ 0.0000,  0.2041,  0.4082,  0.6122,  0.8163,  1.0204,  1.2245,  1.4286,
         1.6327,  1.8367,  2.0408,  2.2449,  2.4490,  2.6531,  2.8571,  3.0612,
         3.2653,  3.4694,  3.6735,  3.8776,  4.0816,  4.2857,  4.4898,  4.6939,
         4.8980,  5.1020,  5.3061,  5.5102,  5.7143,  5.9184,  6.1224,  6.3265,
         6.5306,  6.7347,  6.9388,  7.1429,  7.3469,  7.5510,  7.7551,  7.9592,
         8.1633,  8.3673,  8.5714,  8.7755,  8.9796,  9.1837,  9.3878,  9.5918,
         9.7959, 10.0000])

In [6]:
n

array([ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,  0.81632653,
        1.02040816,  1.2244898 ,  1.42857143,  1.63265306,  1.83673469,
        2.04081633,  2.24489796,  2.44897959,  2.65306122,  2.85714286,
        3.06122449,  3.26530612,  3.46938776,  3.67346939,  3.87755102,
        4.08163265,  4.28571429,  4.48979592,  4.69387755,  4.89795918,
        5.10204082,  5.30612245,  5.51020408,  5.71428571,  5.91836735,
        6.12244898,  6.32653061,  6.53061224,  6.73469388,  6.93877551,
        7.14285714,  7.34693878,  7.55102041,  7.75510204,  7.95918367,
        8.16326531,  8.36734694,  8.57142857,  8.7755102 ,  8.97959184,
        9.18367347,  9.3877551 ,  9.59183673,  9.79591837, 10.        ])

In [8]:
t.reshape(2, 5, 5)

tensor([[[ 0.0000,  0.2041,  0.4082,  0.6122,  0.8163],
         [ 1.0204,  1.2245,  1.4286,  1.6327,  1.8367],
         [ 2.0408,  2.2449,  2.4490,  2.6531,  2.8571],
         [ 3.0612,  3.2653,  3.4694,  3.6735,  3.8776],
         [ 4.0816,  4.2857,  4.4898,  4.6939,  4.8980]],

        [[ 5.1020,  5.3061,  5.5102,  5.7143,  5.9184],
         [ 6.1224,  6.3265,  6.5306,  6.7347,  6.9388],
         [ 7.1429,  7.3469,  7.5510,  7.7551,  7.9592],
         [ 8.1633,  8.3673,  8.5714,  8.7755,  8.9796],
         [ 9.1837,  9.3878,  9.5918,  9.7959, 10.0000]]])

In [9]:
n.reshape(2, 5, 5)

array([[[ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,
          0.81632653],
        [ 1.02040816,  1.2244898 ,  1.42857143,  1.63265306,
          1.83673469],
        [ 2.04081633,  2.24489796,  2.44897959,  2.65306122,
          2.85714286],
        [ 3.06122449,  3.26530612,  3.46938776,  3.67346939,
          3.87755102],
        [ 4.08163265,  4.28571429,  4.48979592,  4.69387755,
          4.89795918]],

       [[ 5.10204082,  5.30612245,  5.51020408,  5.71428571,
          5.91836735],
        [ 6.12244898,  6.32653061,  6.53061224,  6.73469388,
          6.93877551],
        [ 7.14285714,  7.34693878,  7.55102041,  7.75510204,
          7.95918367],
        [ 8.16326531,  8.36734694,  8.57142857,  8.7755102 ,
          8.97959184],
        [ 9.18367347,  9.3877551 ,  9.59183673,  9.79591837,
         10.        ]]])

## General Broadcasting Rules

NumPy compares the shapes of arrays when doing operations on them, starting with the right-most dimensions and moving left. Two dimensions are compatible when they're equal or when one of them is 1.

For example, an array with this shape: (1, 2, 3, 4)
is compatible with an array with this shape: (5, 6, 7, 8)


In [12]:
a = np.ones((6, 5))
b = np.arange(5).reshape((1, 5))

In [14]:
a

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [15]:
b

array([[0, 1, 2, 3, 4]])

In [13]:
a*b

array([[0., 1., 2., 3., 4.],
       [0., 1., 2., 3., 4.],
       [0., 1., 2., 3., 4.],
       [0., 1., 2., 3., 4.],
       [0., 1., 2., 3., 4.],
       [0., 1., 2., 3., 4.]])

In [16]:
a = torch.ones((6, 5))
b = torch.arange(5).reshape((1, 5))

In [17]:
a*b

tensor([[0., 1., 2., 3., 4.],
        [0., 1., 2., 3., 4.],
        [0., 1., 2., 3., 4.],
        [0., 1., 2., 3., 4.],
        [0., 1., 2., 3., 4.],
        [0., 1., 2., 3., 4.]])

### 1 Image

The arrays/tensors don't need to have the same # of dimensions for broadcasting.

Example: Scaling the color channels of an image by a different amount.

```python
Image (3D Array): 256 x 256 x 3

Scale (1D Array):             3

Result (3D Array): 256 x 256 x 3

In [None]:
image = torch.randn((256, 256, 3)) # 256 height, 256 width, 3 color channels.
scale = torch.tensor((0.5, 1.5, 1))
result = image*scale

In [19]:
image

tensor([[[ 1.0525e+00, -1.6660e+00,  3.9438e-01],
         [ 1.3701e+00,  8.0176e-01,  2.7599e-01],
         [ 4.1844e-01,  1.3582e+00,  3.1026e-01],
         ...,
         [-1.0780e+00,  1.2455e+00, -3.3096e-01],
         [ 1.0358e+00,  1.1786e+00,  1.2781e+00],
         [ 9.6587e-03,  1.8907e+00, -1.1445e+00]],

        [[ 8.2376e-01, -1.2966e+00, -2.9366e-01],
         [-9.5605e-01,  1.5134e+00, -4.1749e-01],
         [ 2.7789e-01,  3.5184e-01, -2.6870e-03],
         ...,
         [ 5.7974e-01, -7.9050e-01,  2.8802e+00],
         [-7.0645e-02,  6.2995e-01,  9.0971e-01],
         [ 1.5242e+00, -6.8651e-01, -6.5262e-01]],

        [[ 1.9966e-01,  3.2253e-01,  2.0364e-01],
         [-3.0004e-02,  4.6945e-01, -6.5543e-01],
         [ 2.5728e-01, -3.0045e-01,  9.3284e-01],
         ...,
         [-4.0095e-01,  3.7103e-01, -1.2680e+00],
         [ 9.1475e-01,  1.0439e-01, -1.1703e+00],
         [-1.4955e+00, -2.2228e+00, -4.8915e-01]],

        ...,

        [[ 4.4441e-01,  1.4840e-01, -6

In [15]:
result

tensor([[[ 0.0798,  1.7369,  0.6174],
         [-0.1516,  0.4027, -2.5925],
         [ 0.4811, -2.3056, -0.3415],
         ...,
         [-0.1669, -0.9883,  1.0752],
         [ 0.2563, -1.5196, -0.1932],
         [ 0.0388, -1.1467, -1.0058]],

        [[ 0.2866,  0.8284,  0.2383],
         [-0.3464, -1.2435, -0.6764],
         [-0.1164, -0.3575,  0.4215],
         ...,
         [ 0.7877, -1.1866,  0.6854],
         [ 0.2535, -1.5809,  1.9379],
         [ 0.2320, -0.9515, -0.7418]],

        [[ 0.0129, -1.8592,  0.9526],
         [-0.2234, -0.7306, -0.8008],
         [-0.7287,  2.3110, -0.7538],
         ...,
         [ 0.3423,  1.4453,  0.0218],
         [ 0.3247,  1.7774,  2.0419],
         [ 0.0248,  1.3605, -0.1211]],

        ...,

        [[ 0.3353, -0.0319,  0.4101],
         [-0.4515, -1.2659, -0.2063],
         [-0.2784, -1.1100, -1.5977],
         ...,
         [-0.5511,  1.2321, -0.5514],
         [ 0.2646, -1.2557,  2.9311],
         [ 0.2170,  2.5884, -0.5897]],

        [[

### 2 Images

The arrays/tensors don't need to have the same # of dimensions for broadcasting.

Example: Array of 2 images and wanting to scale the color channels of each image by a slightly different amount.

```python
Images (4D Array): 2 x 256 x 256 x 3

Scales (4D Array): 2 x 1 x 1 x 3

Results (4D Array): 2 x 256 x 256 x 3

In [None]:
images = torch.randn((2, 256, 256, 3)) # First number (2) represents the batch size of 2 images. 256 pixels in height, 256 pixels in width, and 3 color channels for RGB.
scales = torch.tensor([0.5, 1.5, 1, 1.5, 1, 0.5]).reshape((2, 1, 1, 3)) # 1x1 in reshape() allow the entire image to be broadcast to match the full image.

In [27]:
results = images * scales
results

tensor([[[[ 0.8522,  1.0923,  0.7599],
          [-1.0957, -2.8983,  0.1554],
          [-0.5626, -0.6021,  0.6851],
          ...,
          [ 0.2110, -0.5126, -0.6695],
          [-0.5228,  2.5122,  0.0654],
          [ 0.2978,  0.6578, -0.1609]],

         [[ 0.5921, -1.1434, -0.8861],
          [ 0.2508,  2.9169, -0.9600],
          [ 0.3760, -0.2648, -0.6563],
          ...,
          [ 1.2775, -0.7250, -1.7044],
          [ 0.1054, -0.1606, -2.1434],
          [ 0.8957,  2.0595, -0.6691]],

         [[-0.1917, -1.0970,  0.9026],
          [ 0.5043, -1.9590, -1.7855],
          [-0.3218,  1.4676,  0.6270],
          ...,
          [-0.4307,  0.4541, -0.6349],
          [-0.3854, -0.3487, -1.4607],
          [ 0.2175, -0.4314,  1.8270]],

         ...,

         [[-0.6955,  0.8256,  0.9564],
          [-0.0899, -3.1959,  0.0832],
          [ 0.2634, -0.1220,  0.3382],
          ...,
          [ 0.5373,  0.1287,  0.9361],
          [ 0.3760,  2.8989,  2.2514],
          [-0.9556,  0

# Operations Across Dimensions

Very fundamental operations in PyTorch that are similar to NumPy.

In [None]:
t = torch.tensor([0.5, 1, 3, 4])
torch.mean(t), torch.std(t), torch.max(t), torch.min(t) # Use .float() after the tensor if it consists of no floats as these functions often return decimals.

(tensor(2.1250), tensor(1.6520), tensor(4.), tensor(0.5000))

## 2D Tensors

For a 2D tensor, let's say we want to take the mean of the first column. Taking the mean of each column is the equivalent of taking the mean across the rows.

In [None]:
t = torch.arange(20, dtype = float).reshape(5, 4)
torch.mean(t, axis = 0) # axis = 0 evaluates the rows while axis = 1 evaluates the columns.

tensor([ 8.,  9., 10., 11.], dtype=torch.float64)

In [3]:
t

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]], dtype=torch.float64)

This is possible for higher dimensional arrays.

In [None]:
t = torch.randn(4, 256, 256, 3) # 4 images, each 256x256 pixels with 3 color channels.

In [22]:
torch.mean(t, axis = 0).shape # Mean across the batch of images when axis is 0; the first number in tensor t which is 4.

torch.Size([256, 256, 3])

In [23]:
torch.mean(t, axis = -1).shape # Takes mean of the color channels when axis is -1; the last number in the tensor which is 3.

torch.Size([4, 256, 256])

Another operation is taking the maximum color channel values from each image. This has a variety of applications in image segmentation.

In [27]:
values, indices = torch.max(t, axis = -1) # Returns the brightest values for each channel along with their positions.

In [38]:
indices # 0, 1, & 2 correspond to red, green, and blue.

tensor([[[0, 2, 0,  ..., 1, 0, 1],
         [2, 1, 0,  ..., 1, 1, 0],
         [2, 1, 0,  ..., 2, 1, 0],
         ...,
         [2, 2, 0,  ..., 1, 1, 2],
         [2, 0, 2,  ..., 0, 1, 2],
         [2, 1, 1,  ..., 1, 1, 1]],

        [[1, 1, 1,  ..., 2, 0, 0],
         [0, 2, 0,  ..., 2, 1, 1],
         [2, 1, 0,  ..., 2, 2, 0],
         ...,
         [2, 0, 1,  ..., 2, 0, 1],
         [2, 2, 0,  ..., 2, 2, 0],
         [0, 2, 0,  ..., 2, 1, 2]],

        [[1, 0, 2,  ..., 1, 1, 1],
         [1, 2, 1,  ..., 1, 1, 1],
         [1, 0, 2,  ..., 0, 2, 0],
         ...,
         [1, 2, 0,  ..., 2, 2, 2],
         [0, 2, 1,  ..., 2, 0, 2],
         [1, 1, 0,  ..., 2, 1, 0]],

        [[2, 1, 1,  ..., 2, 0, 2],
         [0, 2, 0,  ..., 1, 0, 1],
         [1, 1, 1,  ..., 0, 1, 1],
         ...,
         [2, 0, 2,  ..., 0, 0, 1],
         [1, 1, 2,  ..., 2, 2, 1],
         [0, 0, 0,  ..., 1, 1, 1]]])

In [40]:
values[0]

tensor([[ 1.3262e+00,  7.6990e-01,  2.1199e+00,  ...,  1.2491e+00,
         -2.3270e-01,  1.5231e+00],
        [-1.0046e+00,  5.7784e-01,  1.0310e+00,  ...,  1.5387e+00,
          9.0940e-01,  1.3376e+00],
        [ 5.4676e-01,  1.6488e-03,  1.6760e+00,  ..., -2.4037e-02,
          1.0200e+00,  5.1438e-01],
        ...,
        [ 7.4960e-01,  8.5924e-03,  1.6779e+00,  ..., -4.3419e-01,
          7.9431e-01,  1.0055e+00],
        [ 1.1981e+00,  3.0529e-01,  6.1339e-01,  ...,  2.9202e-01,
          1.5079e+00,  1.0418e+00],
        [ 4.3734e-01, -9.0478e-02,  1.3013e+00,  ...,  8.8235e-01,
          1.7419e+00,  1.1982e+00]])

# Differences Between NumPy and PyTorch

They compute gradients of operations differently.

$$y = \sum_{i} x_i^3$$

has a gradient of

$\frac{\partial y}{\partial x_i} = 3x_i^2 $

In [58]:
x = torch.tensor([[3., 5.], [8., 7.]], requires_grad = True) # With the requires_grad parameter, PyTorch remembers the gradient when operations are done on the tensor.

In [59]:
y = x.pow(3).sum() # .sum() is a special method for PyTorch as well as NumPy.
y

tensor(1007., grad_fn=<SumBackward0>)

In [None]:
y.backward() # Compute the gradient. Important for ML concepts such as gradient descent and backpropagation.
x.grad # Print the gradient attribute of x for each element in the tensor.

tensor([[ 27.,  75.],
        [192., 147.]])

In [None]:
3*x**2 # Check with the analytic derivative formula.

tensor([[ 27.,  75.],
        [192., 147.]], grad_fn=<MulBackward0>)

Computing gradients is very vital to how neural networks learn from their mistakes. Neural networks can be described as functions. But, they are more convoluted than the above example so with them, you will rarely use analytic formulas for computing the gradients so PyTorch is used instead. 

After that, a neural network adjusts certain parameters called weights as well as biases in order to minimize what is called the loss function. The loss function measures errors in a network's predictions, and must therefore be minimized in order for the network to give more accurate outputs.

# Additional Benefits

PyTorch does matrix multiplication with tensors much quicker than NumPy does with its arrays, and becomes even faster with the use of a GPU instead of a CPU. However, more memory is used up by tensors in order to do faster operations.

In [61]:
A = torch.randn((1000, 1000))
B = torch.randn((1000, 1000))

t1 = time.perf_counter()
torch.matmul(A, B)
t2 = time.perf_counter()
t2-t1

0.04854950000299141

In [None]:
A = np.random.randn(int(1e6)).reshape(1000, 1000)
B = np.random.randn(int(1e6)).reshape(1000, 1000)

t1 = time.perf_counter()
A@B
t2 = time.perf_counter()
t2-t1 # Time is greater for NumPy so it is slower.

0.07111910000094213