In [5]:
import torch
import torch.nn as nn

**TENSORS**

Tensors are a specialized data structure that are very similar to arrays and matrices. In PyTorch, we use tensors to encode the inputs and outputs of a model, as well as the model’s parameters.

Tensors are similar to NumPy’s ndarrays, except that tensors can run on GPUs or other hardware accelerators. In fact, tensors and NumPy arrays can often share the same underlying memory, eliminating the need to copy data (see Bridge with NumPy). Tensors are also optimized for automatic differentiation (we’ll see more about that later in the Autograd section). If you’re familiar with ndarrays, you’ll be right at home with the Tensor API. If not, follow along!

In [20]:
# Initialize a tensor from a Python List
data = [
        [0, 1], 
        [2, 3],
        [4, 5]
       ]
x_python = torch.tensor(data)

# Print the tensor
x_python

tensor([[0, 1],
        [2, 3],
        [4, 5]])

We can also call torch.tensor() with the optional dtype parameter, which will set the data type. Some useful datatypes to be familiar with are: torch.bool, torch.float, and torch.long.


In [None]:
x_float = torch.tensor(data, dtype=torch.float)
x_float



tensor([[0., 1.],
        [2., 3.],
        [4., 5.]])

In [None]:
x_python.float()

tensor([[0., 1.],
        [2., 3.],
        [4., 5.]])

We can also initialize a tensor from a NumPy array.

In [None]:
import numpy as np

# Initialize a tensor from a NumPy array
ndarray = np.array(data)
x_numpy = torch.from_numpy(ndarray)

# Print the tensor
x_numpy

tensor([[0, 1],
        [2, 3],
        [4, 5]])

We can also initialize a tensor from another tensor, using the following methods:

**torch.ones_like(old_tensor)**: Initializes a tensor of 1s.
**torch.zeros_like(old_tensor)**: Initializes a tensor of 0s.
t**orch.rand_like(old_tensor**): Initializes a tensor where all the elements are sampled from a uniform distribution between 0 and 1.
**torch.randn_like(old_tensor)**: Initializes a tensor where all the elements are sampled from a normal distribution.
All of these methods preserve the tensor properties of the original tensor passed in, such as the shape and device, which we will cover in a bit.

In [None]:
# Initialize a base tensor
x = torch.tensor([[1., 2.], [3., 4.]])
x

tensor([[1., 2.],
        [3., 4.]])

In [None]:
x_zeros = torch.zeros_like(x)
x_zeros

tensor([[0., 0.],
        [0., 0.]])

In [None]:
# Initialize a tensor where each element is sampled from a uniform distribution
# between 0 and 1
x_rand = torch.rand_like(x)
x_rand

tensor([[0.5282, 0.0262],
        [0.7543, 0.9231]])

By Specifying a Shape
We can also instantiate tensors by specifying their shapes (which we will cover in more detail in a bit). The methods we could use follow the ones in the previous section:

torch.zeros()
torch.ones()
torch.rand()
torch.randn()

In [None]:
# Initialize a 2x3x2 tensor of 0s
shape = (4, 2, 2)
x_zeros = torch.zeros(shape) # x_zeros = torch.zeros(4, 3, 2) is an alternative
x_zeros


tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

Tensor Properties
Data Type
The dtype property lets us see the data type of a tensor.
Shape
The shape property tells us the shape of our tensor. This can help us identify how many dimensional our tensor is as well as how many elements exist in each dimension.

In [None]:
x = torch.ones(3, 2)
print(x.dtype)

torch.float32


In [None]:
# Initialize a 3x2 tensor, with 3 rows and 2 columns
x = torch.Tensor([[1, 2], [3, 4], [5, 6]])
print(x)
print(x.shape)
print(x.shape[0])
print(x.size(0))

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
torch.Size([3, 2])
3
3


In [None]:
#We can change the shape of a tensor with the view() method.
# Example use of view()
# x_view shares the same memory as x, so changing one changes the other
x_view = x.view(3, 2)
print(x_view)
x_view = x.view(-1, 3)
print(x_view)
# Change the shape of x to be 3x2
# x_reshaped could be a reference to or copy of x
x_reshaped = torch.reshape(x, (2, 3))
print(x_reshaped)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [None]:
# Initialize a 5x2 tensor, with 5 rows and 2 columns
x = torch.arange(10).reshape(5, 2)
print(x)

# Add a new dimension of size 1 at the 1st dimension
x = x.unsqueeze(1)
print(x.shape)

# Squeeze the dimensions of x by getting rid of all the dimensions with 1 element
x = x.squeeze()
print(x.shape)

tensor([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7],
        [8, 9]])
torch.Size([5, 1, 2])
torch.Size([5, 2])


# **Device**
Device property tells PyTorch where to store our tensor. Where a tensor is stored determines which device, GPU or CPU, would be handling the computations involving it. We can find the device of a tensor with the device property.

In [None]:
x = torch.Tensor([[1, 2], [3, 4]])
x

tensor([[1., 2.],
        [3., 4.]])

In [None]:
x.device

device(type='cpu')

In [None]:
#We can move a tensor from one device to another with the method to(device).
# Check if a GPU is available, if so, move the tensor to the GPU
if torch.cuda.is_available():
  x.to('cuda') 

**Tensor Indexing**
In PyTorch we can index tensors, similar to NumPy.

In [None]:
x = torch.Tensor([
                  [[1, 2], [3, 4]],
                  [[5, 6], [7, 8]], 
                  [[9, 10], [11, 12]] 
                 ])
print(x)
print(x.shape)
print(x[0])
print(x[:, 0, 0])
print(x[0, 0, 0].item()) #prints scalar value

tensor([[[ 1.,  2.],
         [ 3.,  4.]],

        [[ 5.,  6.],
         [ 7.,  8.]],

        [[ 9., 10.],
         [11., 12.]]])
torch.Size([3, 2, 2])
tensor([[1., 2.],
        [3., 4.]])
tensor([1., 5., 9.])
1.0


# **Operations**
PyTorch operations are very similar to those of **NumPy**. We can work with both scalars and other tensors.

In [None]:
# Create an example tensor
x = torch.ones((3,2,2))
print(x)
print(x+2)
print(x*3)


tensor([[[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]]])
tensor([[[3., 3.],
         [3., 3.]],

        [[3., 3.],
         [3., 3.]],

        [[3., 3.],
         [3., 3.]]])
tensor([[[3., 3.],
         [3., 3.]],

        [[3., 3.],
         [3., 3.]],

        [[3., 3.],
         [3., 3.]]])


In [None]:
a = torch.ones((4,3)) * 6
print(a)

b = torch.ones(3) * 2
b

c = a @ b 

print(c)
print(c.size())
print(c.shape)

tensor([[6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.],
        [6., 6., 6.]])
tensor([36., 36., 36., 36.])
torch.Size([4])
torch.Size([4])


In [None]:
#operations on mean median mode

# **Autograd**
PyTorch and other machine learning libraries are known for their automatic differantiation feature. That is, given that we have defined the set of operations that need to be performed, the framework itself can figure out how to compute the gradients. We can call the backward() method to ask PyTorch to calculate the gradiends, which are then stored in the grad attribute.


In [35]:
# Create an example tensor
# requires_grad parameter tells PyTorch to store gradients
x = torch.tensor([2.], requires_grad=True)

print(x)
# Print the gradient if it is calculated
# Currently None since x is a scalar
print(x.grad)


# Calculating the gradient of y with respect to x
y = x * x * 3 # 3x^2
print(y)
y.backward()
print(x.grad) # d(y)/d(x) = d(3x^2)/d(x) = 6x = 12

z = x * x * 3 # 3x^2
z.backward()
print(x.grad)

tensor([2.], requires_grad=True)
None
tensor([12.], grad_fn=<MulBackward0>)
tensor([12.])
tensor([24.])


Neural Network Module

So far we have looked into the tensors, their properties and basic operations on tensors

C we will use predefined blocks in the torch.nn module of PyTorch. We will then put together these blocks to create complex networks

In [21]:
import torch.nn as nn

In [22]:
layer_1 = nn.Linear(20,30)
input = torch.randn(128,20)
output =layer_1(input)
print(output.size())

torch.Size([128, 30])


In [23]:
# Create the inputs
input = torch.ones(2,3,4)
print(input)
# Make a linear layers transforming N,*,H_in dimensinal inputs to N,*,H_out
# dimensional outputs
linear = nn.Linear(4, 2)
linear_output = linear(input)
linear_output

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])


tensor([[[-0.8355, -0.6406],
         [-0.8355, -0.6406],
         [-0.8355, -0.6406]],

        [[-0.8355, -0.6406],
         [-0.8355, -0.6406],
         [-0.8355, -0.6406]]], grad_fn=<AddBackward0>)

Other Module Layers

There are several other preconfigured layers in the nn module. Some commonly used examples are **nn.Conv2d**, **nn.ConvTranspose2d**, **nn.BatchNorm1d**, **nn.BatchNorm2d**,** nn.Upsample** and **nn.MaxPool2d** among many others. We will learn more about these as we progress in the course. For now, the only important thing to remember is that we can treat each of these layers as plug and play components: we will be providing the required dimensions and PyTorch will take care of setting them up.

In [None]:
linear_output

Putting the Layers Together
So far we have seen that we can create layers and pass the output of one as the input of the next. Instead of creating intermediate tensors and passing them around, we can use nn.Sequentual, which does exactly that.

In [None]:
block = nn.Sequential(
    nn.Linear(4, 2),
    nn.Sigmoid()
)

input = torch.ones(2,3,4)
output = block(input)
output

tensor([[[0.5708, 0.3248],
         [0.5708, 0.3248],
         [0.5708, 0.3248]],

        [[0.5708, 0.3248],
         [0.5708, 0.3248],
         [0.5708, 0.3248]]], grad_fn=<SigmoidBackward0>)

Custom Modules

In [8]:
class MultilayerPerceptron(nn.Module):

  def __init__(self, input_size, hidden_size):
    # Call to the __init__ function of the super class
    super(MultilayerPerceptron, self).__init__()

    # Bookkeeping: Saving the initialization parameters
    self.input_size = input_size 
    self.hidden_size = hidden_size 

    # Defining of our model
    # There isn't anything specific about the naming of `self.model`. It could
    # be something arbitrary.
    self.model = nn.Sequential(
        nn.Linear(self.input_size, self.hidden_size),
        nn.ReLU(),
        nn.Linear(self.hidden_size, self.input_size),
        nn.Sigmoid()
    )
    
  def forward(self, x):
    output = self.model(x)
    return output

In [9]:
class MultilayerPerceptron(nn.Module):

  def __init__(self, input_size, hidden_size):
    # Call to the __init__ function of the super class
    super(MultilayerPerceptron, self).__init__()

    # Bookkeeping: Saving the initialization parameters
    self.input_size = input_size 
    self.hidden_size = hidden_size 

    # Defining of our layers
    self.linear = nn.Linear(self.input_size, self.hidden_size)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(self.hidden_size, self.input_size)
    self.sigmoid = nn.Sigmoid()
    
  def forward(self, x):
    linear = self.linear(x)
    relu = self.relu(linear)
    linear2 = self.linear2(relu)
    output = self.sigmoid(linear2)
    return output

In [24]:
input = torch.randn(2, 5)

# Create our model
model = MultilayerPerceptron(5, 3)

# Pass our input through our model
model(input)

tensor([[0.5821, 0.4073, 0.6553, 0.5739, 0.5269],
        [0.6351, 0.4251, 0.6537, 0.5203, 0.4405]], grad_fn=<SigmoidBackward0>)

In [25]:
list(model.named_parameters())

[('linear.weight', Parameter containing:
  tensor([[-0.1863,  0.3020, -0.4289,  0.4208, -0.2215],
          [ 0.3623, -0.1560,  0.2063, -0.3043,  0.2970],
          [-0.4134,  0.3484, -0.3711, -0.3209,  0.1522]], requires_grad=True)),
 ('linear.bias', Parameter containing:
  tensor([-0.4461,  0.4108, -0.3960], requires_grad=True)),
 ('linear2.weight', Parameter containing:
  tensor([[ 0.2067, -0.4380,  0.5084],
          [ 0.1321,  0.0936,  0.0879],
          [ 0.1349,  0.5425, -0.5662],
          [-0.3167, -0.0051, -0.3521],
          [-0.5361, -0.1158,  0.3384]], requires_grad=True)),
 ('linear2.bias', Parameter containing:
  tensor([ 0.4122, -0.3925,  0.5426,  0.2989,  0.1292], requires_grad=True))]

In [28]:
#Optimization

import torch.optim as optim

# Create the y data
y = torch.ones(10, 5)

# Add some noise to our goal y to generate our x
# We want out model to predict our original data, albeit the noise
x = y + torch.randn_like(y)
x

tensor([[ 2.6953,  1.0192,  1.9463, -0.1134,  1.0221],
        [ 0.3469,  1.0425,  0.8886,  0.5200,  2.8855],
        [ 0.5029,  0.2115,  0.5042,  0.1324,  0.7710],
        [ 0.5735,  1.1654,  1.2412,  0.3652,  1.9605],
        [ 1.8078,  1.7369,  0.9938,  1.7352,  0.1917],
        [ 0.6266, -0.9348, -0.9005,  0.0607,  2.0082],
        [ 0.1749, -1.0601,  1.4675,  2.7334,  0.5334],
        [ 1.5671, -0.7165,  0.7894, -0.5623,  1.6371],
        [ 1.4686,  0.7532,  0.4516, -0.5326,  0.3501],
        [-0.4979,  0.0860, -1.6576,  2.5388,  1.0513]])

In [29]:
# Instantiate the model
model = MultilayerPerceptron(5, 3)

# Define the optimizer
adam = optim.Adam(model.parameters(), lr=1e-1)

# Define loss using a predefined loss function
loss_function = nn.BCELoss()

# Calculate how our model is doing now
y_pred = model(x)
loss_function(y_pred, y).item()

0.8663741946220398

In [30]:
# Set the number of epoch, which determines the number of training iterations
n_epoch = 10 

for epoch in range(n_epoch):
  # Set the gradients to 0
  adam.zero_grad()

  # Get the model predictions
  y_pred = model(x)

  # Get the loss
  loss = loss_function(y_pred, y)

  # Print stats
  print(f"Epoch {epoch}: traing loss: {loss}")

  # Compute the gradients
  loss.backward()

  # Take a step to optimize the weights
  adam.step()

Epoch 0: traing loss: 0.8663741946220398
Epoch 1: traing loss: 0.6335042715072632
Epoch 2: traing loss: 0.48572659492492676
Epoch 3: traing loss: 0.3596430718898773
Epoch 4: traing loss: 0.2563168406486511
Epoch 5: traing loss: 0.17937031388282776
Epoch 6: traing loss: 0.1238587275147438
Epoch 7: traing loss: 0.08251111209392548
Epoch 8: traing loss: 0.05386689305305481
Epoch 9: traing loss: 0.03442545607686043
