In [1]:
import torch
import numpy as np

# torch.Tensor

## Initialize with Python lists

In [6]:
arr = [[1, 3], [2, 0]]

# Numpy
arr_n = np.array(arr)
print(type(arr_n))
print(arr_n)

# PyTorch
arr_t = torch.Tensor(arr)
print(type(arr_t))
print(arr_t)

<class 'numpy.ndarray'>
[[1 3]
 [2 0]]
<class 'torch.Tensor'>
tensor([[1., 3.],
        [2., 0.]])


## Initialization: ones & zeros

In [4]:
# Numpy
print(np.ones((2, 3)))
print(np.zeros((2, 3)))

# PyTorch
print(torch.ones(2, 3))
print(torch.zeros((2, 3)))

[[1. 1. 1.]
 [1. 1. 1.]]
[[0. 0. 0.]
 [0. 0. 0.]]
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0., 0., 0.],
        [0., 0., 0.]])


## Initialization: ones_like & zeros_like
Return an array of ones/zeros with the same shape and type as a given array.

In [7]:
# Numpy
print(np.ones_like(arr_n))
print(np.zeros_like(arr_n))

# PyTorch
print(torch.ones_like(arr_t))
print(torch.ones_like(arr_t))

[[1 1]
 [1 1]]
[[0 0]
 [0 0]]
tensor([[1., 1.],
        [1., 1.]])
tensor([[1., 1.],
        [1., 1.]])


## Two ways of specifying data type
type : https://pytorch.org/docs/stable/tensors.html

In [8]:
# 1. Use Keyword argument dtype
print(torch.ones((2, 3), dtype = torch.int))
print(torch.ones((2, 3), dtype = torch.float))

tensor([[1, 1, 1],
        [1, 1, 1]], dtype=torch.int32)
tensor([[1., 1., 1.],
        [1., 1., 1.]])


In [9]:
# 2. Use typed tensors
ft = torch.FloatTensor([1, 2])
print(ft)
print(ft.dtype)

tensor([1., 2.])
torch.float32


# Tensor operations

## Accessing elements

In [12]:
arr_t = torch.Tensor([[1, 2], [2, 3]])
print(arr_t[0, 1]) # Similar to NumPy. But, it always returns Tensor
print(arr_t[0, 1].item()) # Get a Python number

tensor(2.)
2.0


## Updating elements

In [13]:
arr_t[0, 1] = 0
print(arr_t)

tensor([[1., 0.],
        [2., 3.]])


## Slicing

[strat: end]
- Start (inclusive), end (exclusive)
- Default values (start: 0, end: length)

In [14]:
t = torch.Tensor([[1, 2, 3, 4], [2, 3, 4, 5], [5, 6, 7, 8]])
print(t)

tensor([[1., 2., 3., 4.],
        [2., 3., 4., 5.],
        [5., 6., 7., 8.]])


In [15]:
print(t[:2])
print(t[1: , 1:3])
print(t[:, 1:])

t[1:, 1:3] = 0
print(t)

tensor([[1., 2., 3., 4.],
        [2., 3., 4., 5.]])
tensor([[3., 4.],
        [6., 7.]])
tensor([[2., 3., 4.],
        [3., 4., 5.],
        [6., 7., 8.]])
tensor([[1., 2., 3., 4.],
        [2., 0., 0., 5.],
        [5., 0., 0., 8.]])


## Negative slicing
- Use minus operator to refer to an index from the end
|Index|0|1|2|...|N-2|N-1|
|-----|-|-|-|---|---|---|
|Negative index|N-1|N-2|N-3|...|-2|-1|

In [16]:
print(t[:, :-1])
print(t[:, -3:-1])

tensor([[1., 2., 3.],
        [2., 0., 0.],
        [5., 0., 0.]])
tensor([[2., 3.],
        [0., 0.],
        [0., 0.]])


## Shape & Transpose (matrix)

In [19]:
X = torch.Tensor([[1, 2, 3], [4, 5, 6]])
print(X)
print(X.T)

tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor([[1., 4.],
        [2., 5.],
        [3., 6.]])


In [20]:
print(X.shape)
print(X.T.shape)

torch.Size([2, 3])
torch.Size([3, 2])


## Sum
- `torch.sum(X)` = `X.sum()`

* `torch.sum(input, dim, keepdim = False, *, dtype = None)` -> Tensor
    - Parametes
        + **input** (Tensor)
        + **dim** (int or tuple, optional) : the dimensions to reduce
    - Keyword arguments
        + **dtype** (torch.dtype, optional) : desired datatype (default: None)
        + **keepdim** (bool) : wheater the output tensor has dim retained or not
        eg) 2\*3 dimension --sum--> (True: 1\*3, False: 1)

In [25]:
print(X)

print(X.sum())
print(torch.sum(X))

print(X.sum(0))
print(X.sum(0, keepdim = True))

print(X.sum(1))
print(X.sum(1, keepdim = True))

tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor(21.)
tensor(21.)
tensor([5., 7., 9.])
tensor([[5., 7., 9.]])
tensor([ 6., 15.])
tensor([[ 6.],
        [15.]])


## Mean
- `torch.mean(input, dim, keepdim = False, *)` -> Tensor
    - Parametes
        + **input** (Tensor)
        + **dim** (int or tuple, optional) : the dimensions to reduce

In [26]:
print(X)

print(X.mean())
print(X.mean(0))
print(X.mean(1))

tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor(3.5000)
tensor([2.5000, 3.5000, 4.5000])
tensor([2., 5.])


## Max
- `torch.max(input, dim, keepdim = False, *)` -> Tensor
    - Parametes
        + **input** (Tensor)
        + **dim** (int or tuple, optional) : the dimensions to reduce
    - Output
        + **out** (Tensor, if dim is specified) : the input tensor(max, max_indices)

In [29]:
X[0, 1] = 7
print(X)

print(X.max())
print(X.max(0))
print(X.max(1))

tensor([[1., 7., 3.],
        [4., 5., 6.]])
tensor(7.)
torch.return_types.max(
values=tensor([4., 7., 6.]),
indices=tensor([1, 0, 1]))
torch.return_types.max(
values=tensor([7., 6.]),
indices=tensor([1, 2]))


## Binary Operators
- Addition $Z=X+Y$
- Element-wise multiplication $z_{ij} = x_{ij}*y_{ij}$
- Matrix multiplication

In [30]:
X = torch.Tensor([[1, 2, 3], [4, 5, 6]])
Y = torch.Tensor([[1, 0, 2], [1, 0, 1]])

print(X+Y)
print(X*Y)
print(torch.matmul(X.T, Y))
print(torch.matmul(X, Y.T))

tensor([[2., 2., 5.],
        [5., 5., 7.]])
tensor([[1., 0., 6.],
        [4., 0., 6.]])
tensor([[ 5.,  0.,  6.],
        [ 7.,  0.,  9.],
        [ 9.,  0., 12.]])
tensor([[ 7.,  4.],
        [16., 10.]])


## Inner product

In [32]:
print(torch.inner(X, Y))

tensor([[ 7.,  4.],
        [16., 10.]])


## View
Returns a tensor with te same data ans number of elements as self but with the specified shape

* `X.view(*shape)`
    - **shape** (int or tuple): the desired shape

In [42]:
X = torch.Tensor([[[1, 2, 3, 4], [2, 3, 4, 5], [5, 6, 7, 8]], [[1, 2, 3, 4], [2, 3, 4, 5], [5, 6, 7, 8]]])
print(X.shape)
print(X)

Y = X.view(3, 2, 4)
print(Y.shape)
print(Y)

Y = X.view(6, 4)
print(Y.shape)
print(Y)

torch.Size([2, 3, 4])
tensor([[[1., 2., 3., 4.],
         [2., 3., 4., 5.],
         [5., 6., 7., 8.]],

        [[1., 2., 3., 4.],
         [2., 3., 4., 5.],
         [5., 6., 7., 8.]]])
torch.Size([3, 2, 4])
tensor([[[1., 2., 3., 4.],
         [2., 3., 4., 5.]],

        [[5., 6., 7., 8.],
         [1., 2., 3., 4.]],

        [[2., 3., 4., 5.],
         [5., 6., 7., 8.]]])
torch.Size([6, 4])
tensor([[1., 2., 3., 4.],
        [2., 3., 4., 5.],
        [5., 6., 7., 8.],
        [1., 2., 3., 4.],
        [2., 3., 4., 5.],
        [5., 6., 7., 8.]])


## Squeeze / Unsqueeze

In [49]:
X = torch.Tensor([[[1, 2, 3, 4], [2, 3, 4, 5], [5, 6, 7, 8]]])
print(X, X.shape)
Y = X.squeeze(dim = 0)
print(Y, Y.shape)
Y = Y.unsqueeze(dim = 0)
print(Y, Y.shape)

tensor([[[1., 2., 3., 4.],
         [2., 3., 4., 5.],
         [5., 6., 7., 8.]]]) torch.Size([1, 3, 4])
tensor([[1., 2., 3., 4.],
        [2., 3., 4., 5.],
        [5., 6., 7., 8.]]) torch.Size([3, 4])
tensor([[[1., 2., 3., 4.],
         [2., 3., 4., 5.],
         [5., 6., 7., 8.]]]) torch.Size([1, 3, 4])


## Broadcasting
automatically expanded to be of equal sizes

In [50]:
X = torch.FloatTensor([[1, 2, 3], [4, 5, 6]])
print(X)

Y = torch.FloatTensor([1, 2])
print(Y)

tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor([1., 2.])


In [59]:
print(X+1)
print(X.T)
print(X.T + Y) # Y.T + X -> (X)

tensor([[2., 3., 4.],
        [5., 6., 7.]])
tensor([[1., 4.],
        [2., 5.],
        [3., 6.]])
tensor([[2., 6.],
        [3., 7.],
        [4., 8.]])


- **broadcastable** : starting at the trailing dimension, the dimension sizes must either be
    1. one of them is 1
    2. one of them does not exist

In [60]:
X = torch.FloatTensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
print(X, X.shape)

tensor([[[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]],

        [[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]]) torch.Size([2, 3, 3])


In [62]:
Y = torch.ones((1, 1, 3))
print(Y + X)

Y = torch.ones((1, 3))
print(Y + X)
# (0, 1, 3) - (2, 3, 3) :
# (1),(2),(3). (3) : ok, (2) : ok, (1) : not exist -> ok.

Y = torch.ones(3)
print(Y + X)
# (0, 0, 3) - (2, 3, 3) :
# (1),(2),(3). (3) : ok, (2) : not exist, (1) : not exist -> ok.

tensor([[[ 2.,  3.,  4.],
         [ 5.,  6.,  7.],
         [ 8.,  9., 10.]],

        [[ 2.,  3.,  4.],
         [ 5.,  6.,  7.],
         [ 8.,  9., 10.]]])
tensor([[[ 2.,  3.,  4.],
         [ 5.,  6.,  7.],
         [ 8.,  9., 10.]],

        [[ 2.,  3.,  4.],
         [ 5.,  6.,  7.],
         [ 8.,  9., 10.]]])
tensor([[[ 2.,  3.,  4.],
         [ 5.,  6.,  7.],
         [ 8.,  9., 10.]],

        [[ 2.,  3.,  4.],
         [ 5.,  6.,  7.],
         [ 8.,  9., 10.]]])


## ndarray ↔ tensor

In [64]:
a = X.numpy()
print(type(a))
t = torch.from_numpy(a)
print(type(t))

<class 'numpy.ndarray'>
<class 'torch.Tensor'>


# Autograd

## Example
### 1. Initialize

In [87]:
w = torch.randn(2, requires_grad = True)
x = torch.Tensor([1, 2])

### 2. Predict output

In [88]:
y_hat = torch.inner(w, x)

### 2-2. Intermediate results

In [89]:
print(x)
print(w)
print(y_hat)

tensor([1., 2.])
tensor([-0.3628, -0.8129], requires_grad=True)
tensor(-1.9887, grad_fn=<ReshapeAliasBackward0>)


### 3. Compute loss

In [90]:
loss = (x.mean() - y_hat)**2
print(loss)

tensor(12.1709, grad_fn=<PowBackward0>)


### 4. Backpropagation

In [91]:
loss.backward()

### 4-2. Accessing the gradient

In [92]:
w.grad

tensor([ -6.9774, -13.9547])

### 5. Update parameters

In [93]:
lr = 0.1
with torch.no_grad():
    w = w - lr * w.grad
    print(w.grad)
print(w.requires_grad)
w.requires_grad = True
print(w.grad)

None
False
None


## Avoiding in-place operations

In [101]:
# 1. A = A + X
# 2.
mask = torch.ones_like(t)
mask[1:, :] = 0
print(mask)
t = t*mask
print(t)

tensor([[[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]])
tensor([[[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]])


# Implement a Shallow NN with PyTorch autograd

## Data preparation & Import

In [102]:
import numpy as np
import torch

### XOR data (numpy)

In [118]:
x_seeds = np.array([(0, 0), (1, 0), (0, 1), (1, 1)], dtype = np.float32)
y_seeds = np.array([0, 1, 1, 0])

N = 1000
idxs = np.random.randint(0, 4, N)

X = x_seeds[idxs]
Y = y_seeds[idxs]

X += np.random.normal(scale = 0.25, size = X.shape)

## Model

### Model (torch)

In [119]:
class shallow_neural_network():
    def __init__(self, num_input_features, num_hiddens):
        self.num_input_features = num_input_features
        self.num_hiddens = num_hiddens
        
        self.W1 = torch.randn((num_hiddens, num_input_features), requires_grad = True)
        self.b1 = torch.randn(num_hiddens, requires_grad = True)
        self.W2 = torch.randn(num_hiddens, requires_grad = True)
        self.b2 = torch.randn(1, requires_grad = True)
        
        self.tanh = torch.nn.Tanh()
        self.sigmoid = torch.nn.Sigmoid()
        
    def predict(self, x):
        z1 = torch.matmul(self.W1, x) + self.b1
        a1 = self.tanh(z1)
        z2 = torch.matmul(self.W2, a1) + self.b2
        a2 = self.sigmoid(z2)
        return a2

## Training

In [120]:
def train(X, Y, model, lr = 0.1):
    m = len(X)
    
    cost = 0.0
    for x, y in zip(X, Y):
        x_torch = torch.from_numpy(x)
        
        a2 = model.predict(x_torch)
        if y == 1:
            loss = -torch.log(a2+0.0001)
        else:
            loss = -torch.log(1.0001-a2)
        
        loss.backward()
        cost += loss.item()
        
    with torch.no_grad():
        model.W1 -= lr * model.W1.grad / m
        model.b1 -= lr * model.b1.grad / m
        model.W2 -= lr * model.W2.grad / m
        model.b2 -= lr * model.b2.grad / m
        
    model.W1.requires_grad = True
    model.b1.requires_grad = True
    model.W2.requires_grad = True
    model.b2.requires_grad = True
    
    return cost/m

In [122]:
model = shallow_neural_network(2, 3)

for epoch in range(100):
    cost = train(X, Y, model, 1.0)
    if epoch % 10 == 0:
        print(epoch, cost)

0 0.7527297248840332
10 0.677710083603859
20 0.538378561925143
30 0.3059057673345925
40 0.3551357912369235
50 0.37402007443872093
60 0.3429546073620909
70 0.3608650089541818
80 0.38654966324056045
90 0.39258272700098906


## Testing

In [123]:
print(model.predict(torch.Tensor((0, 0))))
print(model.predict(torch.Tensor((0, 1))))
print(model.predict(torch.Tensor((1, 0))))
print(model.predict(torch.Tensor((1, 1))))

tensor([4.5543e-18], grad_fn=<SigmoidBackward0>)
tensor([0.8421], grad_fn=<SigmoidBackward0>)
tensor([0.7720], grad_fn=<SigmoidBackward0>)
tensor([7.4709e-23], grad_fn=<SigmoidBackward0>)


# nn.Module

## A simple custom module

In [124]:
import torch
from torch import nn

class MyLinear(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_features, out_features))
        self.bias = nn.Parameter(torch.randn(out_features))
        
    def forward(self, input):
        return (input @ self.weight) + self.bias
    
m = MyLinear(4, 3)
sample_input = torch.randn(4)
m(sample_input)

tensor([-1.0721,  3.5980, -1.3363], grad_fn=<AddBackward0>)

## Modules as Building Blocks

In [126]:
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.l0 = nn.Linear(4, 3)
        self.l1 = nn.Linear(3, 1)
    def forward(self, x):
        x = self.l0(x)
        x = F.relu(x)
        x = self.l1(x)
        return x

# Implementing a Shallow NN with autograd and nn.Module

In [156]:
import numpy as np

x_seeds = np.array([(0, 0), (1, 0), (0, 1), (1, 1)], dtype = np.float32)
y_seeds = np.array([0, 1, 1, 0])

N = 1000
idxs = np.random.randint(0, 4, N)

X = x_seeds[idxs]
Y = y_seeds[idxs]

X += np.random.normal(scale = 0.25, size = X.shape)

## Model (torch.nn.Module)

In [148]:
class shallow_neural_network_nnm(nn.Module):
    def __init__(self, num_input_features, num_hiddens):
        super().__init__()
        self.num_input_features = num_input_features
        self.num_hiddens = num_hiddens
        
        self.linear1 = nn.Linear(num_input_features, num_hiddens)
        self.linear2 = nn.Linear(num_hiddens, 1)
        
        self.tanh = torch.nn.Tanh()
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        z1 = self.linear1(x)
        a1 = self.tanh(z1)
        z2 = self.linear2(a1)
        a2 = self.sigmoid(z2)
        return a2

## Training

In [155]:
num_epochs = 100
lr_nnm = 1.0
num_hiddens = 3

model_nnm = shallow_neural_network_nnm(2, num_hiddens)
optimizer_nnm = torch.optim.SGD(model_nnm.parameters(), lr = lr_nnm)
loss_nnm = nn.BCELoss()

for epoch in range(num_epochs):
    optimizer_nnm.zero_grad()
    
    cost = 0.0
    for x, y in zip(X, Y):
        x_torch = torch.from_numpy(x)
        y_torch = torch.FloatTensor([y])
        
        y_hat = model_nnm(x_torch)
        
        loss_val = loss_nnm(y_hat, y_torch)
        cost += loss_val
        
    cost = cost / len(x)
    cost.backward()
    optimizer_nnm.step()
    
    if epoch %10 == 0:
        print(epoch, cost)

0 tensor(347.3601, grad_fn=<DivBackward0>)
10 tensor(23550., grad_fn=<DivBackward0>)
20 tensor(23550., grad_fn=<DivBackward0>)
30 tensor(23550., grad_fn=<DivBackward0>)
40 tensor(23550., grad_fn=<DivBackward0>)
50 tensor(23550., grad_fn=<DivBackward0>)
60 tensor(23550., grad_fn=<DivBackward0>)
70 tensor(23550., grad_fn=<DivBackward0>)
80 tensor(23550., grad_fn=<DivBackward0>)
90 tensor(23550., grad_fn=<DivBackward0>)


## Test

In [145]:
for x, y in zip(x_seeds, y_seeds):
    print(x)
    x_torch = torch.FloatTensor(x)
    y_hat = model(x_torch)
    print(y, y_hat.item())

[0. 0.]
0 1.0
[1. 0.]
1 1.0
[0. 1.]
1 1.0
[1. 1.]
0 1.0
