# Import libraries

In [None]:
# -*- coding: utf-8 -*-
import numpy as np
import math
import matplotlib.pyplot as plt
import torch

# What is PyTorch

## Warm-up: numpy

**Goal:** Let's start with a problem of fitting $y = sin(x)$ with a third order polynomial.

Being a third order polynomial means: $$y = ax^3 + bx^2 + cx + d$$


In [None]:
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# Visualize dataset
plt.plot(x, y)

### Model

#### Define functions

In [None]:
def forward(x, a, b, c, d):
    """ y = ax^3 + bx^2 + cx + d
        y_pred = """
    # Begin code here

    # End code
    return y_pred

In [None]:
def compute_loss(y, y_pred):
    """ MSE function
        loss = """
    # Begin code here

    # End code
    return loss

In [None]:
def backward(x, y, y_pred):
    """ Compute gradients of a, b, c, d w.r.t loss
        grad_a =
        grad_b =
        grad_c =
        grad_d = """
    # Begin code here

    # End code
    return grad_a, grad_b, grad_c, grad_d

#### Train model

In [None]:
# Initialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 0.001

print([a, b, c, d])

In [None]:
# Train through forward pass and backward pass
for epoch in range(1000):
    # Forward pass: Compute predicted y
    y_pred = forward(x, a, b, c, d)
    
    # Compute and print loss
    loss = compute_loss(y, y_pred)
    if epoch % 100 == 99:
        print(f'Epoch: {epoch}, loss: {loss}')

    # Backward pass: Compute gradients (a, b, c, d)
    grad_a, grad_b, grad_c, grad_d = backward(x, y, y_pred)

    # Update weights
    a = a - learning_rate * grad_a
    b = b - learning_rate * grad_b
    c = c - learning_rate * grad_c
    d = d - learning_rate * grad_d

print(f'Result: y = {a}x^3 + {b}x^2 + {c}x + {d}')

In [None]:
# Visualize input and y_pred
y_pred = forward(x, a, b, c, d)
plt.plot(x, y_pred)

# PyTorch Tensors
the central data abstraction in PyTorch.

## Creating Tensors

There are many ways to create a Tensor:
- `torch.empty()`
- `torch.zeros()`
- `torch.ones()`
- `torch.rand()`
- `torch.tensor()`

In [None]:
# Create a tensor
x = torch.empty(3, 4)  # <-- You can change to other functions to see how it works!

print(type(x))
print(x)
print(x.dtype)

A brief note about tensors and their number of dimensions, and terminology:
* You will sometimes see a 1-dimensional tensor called a *vector.*
* Likewise, a 2-dimensional tensor is often referred to as a *matrix.*
* Anything with more than two dimensions is generally just called a tensor.

In [None]:
# tensor will copy a new content of given data
my_tensor = torch.tensor([[2, 10],
                          [20, 3]])

my_numpy = np.array([[2, 10],
                     [20, 3]])

my_tensor = torch.tensor(my_numpy)

## Data Types
`dtype` argument

In [None]:
# Specify dtype argument at creation time
my_tensor = torch.ones((2, 3), dtype=torch.int32)
print(my_tensor)

In [None]:
# Or convert exist tensor to another dtype
my_tensor = my_tensor.to(torch.bool)
print(my_tensor)

## Shapes

In [None]:
# Get the shape of my_tensor
print(my_tensor)
print(my_tensor.shape)

We can create new tensor that has the same shape with exist ones by calling `torch.*_likes()` methods

In [None]:
# Base tensor
my_tensor = torch.empty(3, 3, 3)
print('Base tensor:')
print(my_tensor)
print(my_tensor.shape)

empty_like_tensor = torch.empty_like(my_tensor)
print('\nEmpty tensor:')
print(empty_like_tensor)
print(empty_like_tensor.shape)

zeros_like_tensor = torch.zeros_like(my_tensor)
print('\nZeros tensor:')
print(zeros_like_tensor)
print(zeros_like_tensor.shape)

rand_like_tensor = torch.rand_like(my_tensor)
print('\nRand tensor:')
print(rand_like_tensor)
print(rand_like_tensor.shape)

## Math & Logic with PyTorch Tensors

### Basic arithmetic

Let's start with basic operation: **Tensor with Scalar**

In [None]:
zeros = torch.zeros(3, 3)
ones = zeros + 1
twos = ones * 2

print('zeros:')
print(zeros)

print('\nones = zeros + 1:')
print(ones)

print('\ntwos = ones * 2')
print(twos)

As you can see above, arithmetic operations between tensors and scalars, such as addition, subtraction, multiplication, division, and exponentiation are **distributed over every element** of the tensor.

Because the output of such operation will also be a tensor, we can chain them together with the usual operator precedence rules:

In [None]:
# Chain the operations
fours = ((ones * 2 + 3) ** 2 - 5) / 5
print(fours)

You'd intuitively expect the result of the similar operations between two tensors (**Tensor with Tensor**):

In [None]:
threes = zeros + ones + twos
print('threes = zeros + ones + twos:')
print(threes)

dozens = threes * fours
print('\ndozens = threes * fours:')
print(dozens)

In general cases, binary operations should be performed on tensors with similar shape.

**Note: The following cell throws a run-time error. This is intentional.**


In [None]:
tensor2_3 = torch.rand(2, 3)
tensor3_2 = torch.rand(3, 2)

print(tensor2_3 * tensor3_2)

#### In Brief: Tensor Broadcasting

The exception to the same-shapes rule is *tensor broadcasting*.

Let's see an example:

In [None]:
tensor2_4 = torch.ones(2, 4)
tensor1_4 = torch.ones(1, 4)

print(tensor2_4)
print(tensor1_4)

print()

print('tensor2_4 + tensor1_4:')
print(tensor2_4 + tensor1_4)

The following examples honor the rules and allow broadcasting

In [None]:
base_tensor =     torch.ones(3, 4, 5, 6)

# Compare from last to first

# Each dimension must be equal
a = base_tensor * torch.rand(3, 4, 5, 6)
print(a)

# or One of the dimensions must be of size 1
b = base_tensor * torch.rand(3, 1, 5, 6)
print(b)

# or The dimension does not exist in one of the tensors
c = base_tensor * torch.rand(   4, 5, 6)
print(c)

# or We can combine all the rules together
d = base_tensor * torch.rand(   4, 1, 6)
print(d)

The following examples are not broadcast-able, and **will throw run-time error. This is intentional.**

In [None]:
base_tensor =     torch.ones(4, 3, 2)

# dimensions must match last-to-first
a = base_tensor * torch.rand(4, 3   )

b = base_tensor * torch.rand(   2, 3)

# Empty tensor
c = base_tensor * torch.rand(0)

### More Math with Tensors

#### Common Functions

In [None]:
a = torch.rand(2, 4) * 2 - 1
print('Common functions:')
print(torch.abs(a))
print(torch.ceil(a))
print(a.floor())
print(a.clamp(-0.5, 0.5))

#### Trigonometric functions and their inverses

In [None]:
angles = torch.tensor([0, math.pi / 4, math.pi / 2, 3 * math.pi / 4])

sines = torch.sin(angles)
inverses = torch.asin(sines)
print('Sine and acrsine:')
print(angles)
print(sines)
print(inverses)

#### Bitwise operations

In [None]:
b = torch.tensor([1, 5, 11])
c = torch.tensor([2, 7, 10])

print('Bitwise XOR:')
print(torch.bitwise_xor(b, c))

#### Comparisions:

In [None]:
d = torch.tensor([[1., 2.],
                  [3., 4.]])
e = torch.ones(1, 2)  # many comparison ops support broadcasting!

print(torch.eq(d, e))

#### Reductions

In [None]:
print('Reduction ops:')

print(torch.max(d))
print(torch.max(d).item())

print(torch.mean(d))
print(torch.std(d))
print(d.prod())
print(torch.unique(torch.tensor([1, 2, 1, 2, 1, 3])))

#### Vector and linear algebra operations

In [None]:
v1 = torch.tensor([1, 0, 0])
v2 = torch.tensor([0, 1, 0])
m1 = torch.rand(2, 2)
m2 = torch.tensor([[3.0, 0.0], [0.0, 3.0]])

print('Vector & Matrices')
print(torch.cross(v2, v1))
print(m1)
m3 = torch.matmul(m1, m2)
print(m3)
print(torch.svd(m3))

## Moving to GPU