In [2]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
import random

In [3]:
# set seed
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)

## Tensors

Tensors are a specialized data structure that are very similar to arrays and matrices. In PyTorch, we use tensors to encode the inputs and outputs of a model, as well as the model’s parameters.

Tensors are similar to NumPy’s ndarrays, except that tensors can run on GPUs or other hardware accelerators. Tensors are also optimized for automatic differentiation (we’ll see more about that later in the Autograd section). 

In [4]:
data = [[1,2], [3,4]]
x_data = torch.tensor(data)
x_data

tensor([[1, 2],
        [3, 4]])

In [5]:
x_np = torch.from_numpy(np.array(data))
x_np

tensor([[1, 2],
        [3, 4]], dtype=torch.int32)

In [6]:
x_data == x_np

tensor([[True, True],
        [True, True]])

In [7]:
x_ones = torch.ones_like(x_data)
x_ones

tensor([[1, 1],
        [1, 1]])

In [8]:
x_rand = torch.rand_like(x_data, dtype=torch.float)
x_rand

tensor([[0.8823, 0.9150],
        [0.3829, 0.9593]])

In [9]:
shape = (2,3)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)
rand_tensor, ones_tensor, zeros_tensor

(tensor([[0.3904, 0.6009, 0.2566],
         [0.7936, 0.9408, 0.1332]]),
 tensor([[1., 1., 1.],
         [1., 1., 1.]]),
 tensor([[0., 0., 0.],
         [0., 0., 0.]]))

In [10]:
tensor = torch.rand(3,4)
tensor.shape, tensor.dtype, tensor.device

(torch.Size([3, 4]), torch.float32, device(type='cpu'))

### Operations on Tensors

Over 100 tensor operations, including arithmetic, linear algebra, matrix manipulation (transposing, 
indexing, slicing), sampling and more are
comprehensively described [here](https://pytorch.org/docs/stable/torch.html).

In [29]:
tensor = torch.arange(12).reshape(3, 4).float()
print(tensor)
print('First row: ',tensor[0])
print('First column: ', tensor[:, 0])
print('Last column:', tensor[:, -1])

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])
First row:  tensor([0., 1., 2., 3.])
First column:  tensor([0., 4., 8.])
Last column: tensor([ 3.,  7., 11.])


In [30]:
t1 = torch.cat([tensor, tensor, tensor], dim=0)
print(t1)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])


In [31]:
# This computes the matrix multiplication between two tensors. y1, y2, y3 will have the same value
print(tensor.shape)
y1 = tensor @ tensor.T
print(y1)

torch.Size([3, 4])
tensor([[ 14.,  38.,  62.],
        [ 38., 126., 214.],
        [ 62., 214., 366.]])


In [32]:
y2 = tensor.matmul(tensor.T)
print(y2)

tensor([[ 14.,  38.,  62.],
        [ 38., 126., 214.],
        [ 62., 214., 366.]])


In [35]:
y3 = torch.ones(3,3)
torch.matmul(tensor, tensor.T, out=y3)
print(y3)

tensor([[ 14.,  38.,  62.],
        [ 38., 126., 214.],
        [ 62., 214., 366.]])


In [38]:
(y1 == y2) == (y2==y3)

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

In [39]:
z1 = tensor * tensor

print(z1)

tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.]])


In [41]:
z2 = tensor.mul(tensor)
print(z2)

tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.]])


In [42]:
z3 = torch.ones(3,4)
torch.mul(tensor, tensor, out=z3)
print(z3)

tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.]])


In [45]:
agg = tensor.sum()
agg_item = agg.item()  
print(agg_item, type(agg_item))

66.0 <class 'float'>


### 1.4 GPU Acceleration

If we have NVIDIA GPU(s), we can accelerate computation once we move Tensors onto GPU.
Let's compare how much GPU can accelerate especially matrix operations.
We will do a matrix-matrix multiplication between two 5k-by-5k matrices on both CPU and GPU.

Unfortunately, Coursera does not have a GPU environment. But feel free to try the following snippets on a GPU machine. Ideally, with GPU acceleration, matrix multiplication will be much faster.

### Mul is element wise multiplication (dot product)

In [46]:
mat = torch.rand(5000, 5000)

In [55]:
%%time
print(torch.mul(mat, mat).shape)

torch.Size([5000, 5000])
CPU times: total: 312 ms
Wall time: 50.8 ms


In [59]:
%%time
print(torch.matmul(mat, mat.T).shape)

torch.Size([5000, 5000])
CPU times: total: 20.5 s
Wall time: 2.69 s


In [60]:
%%time
print(torch.mm(mat, mat.T).shape)

torch.Size([5000, 5000])
CPU times: total: 20.1 s
Wall time: 2.67 s


In [66]:
%%time
if torch.cuda.is_available():
    mat = mat.cuda()
    torch.mm(mat, mat.T)
else:
    print('gpu n/a')

CPU times: total: 0 ns
Wall time: 0 ns


### Exercise 1 [10 points]

Implement the Sigmoid function on your own.

$$\sigma(x) = \frac{1}{1 + \exp(-x)}$$

Note that you should not use existing PyTorch implementation.

Hint: try `torch.exp()`.

In [67]:
def sigmoid(x):
    # your code here
    raise NotImplementedError