In [None]:
import torch
import numpy as np
import torch.nn as nn
# Everything in pytorch is based on Tensor operations.
# A tensor can have different dimensions
# so it can be 1d, 2d, or even 3d and higher


In [None]:
# torch.empty(size): uninitiallized
x = torch.empty(1) # scalar
y = np.empty(1)
print(x)
print(y)
x = torch.empty(3) # vector, 1D
y = np.empty(3)
print(x)
print(y)
x = torch.empty(2,3) # matrix, 2D
y = np.empty((2,3))
print(x)
print(y)
x = torch.empty(2,2,3) # tensor, 3 dimensions
#x = torch.empty(2,2,2,3) # tensor, 4 dimensions
print(x)

tensor([1.1531e-35])
[9.]
tensor([1.1531e-35, 0.0000e+00, 5.0447e-44])
[0.75 0.75 0.  ]
tensor([[1.1531e-35, 0.0000e+00, 4.2039e-45],
        [0.0000e+00, 1.4013e-45, 0.0000e+00]])
[[4.52465793e-316 1.13635099e-322 2.12199579e-314]
 [8.81941692e+247 7.36093992e+223 1.81844573e-306]]
tensor([[[1.1531e-35, 0.0000e+00, 8.4078e-45],
         [0.0000e+00, 1.4013e-45, 0.0000e+00]],

        [[0.0000e+00, 0.0000e+00, 1.4013e-45],
         [0.0000e+00, 0.0000e+00, 0.0000e+00]]])


In [None]:
# torch.rand(size): random numbers [0, 1]
x = torch.rand(5, 3)
y = np.random.rand(5,3)
print(x)
print(y)

tensor([[0.4411, 0.8574, 0.7882],
        [0.3418, 0.7050, 0.8734],
        [0.6362, 0.6112, 0.3384],
        [0.0651, 0.9969, 0.2283],
        [0.4568, 0.5402, 0.8449]])
[[0.44578847 0.55372383 0.76882134]
 [0.37838128 0.43534708 0.25734019]
 [0.007463   0.61861402 0.75599084]
 [0.88981829 0.95707311 0.95371256]
 [0.25153734 0.88088372 0.59379655]]


In [None]:
# torch.zeros(size), fill with 0
# torch.ones(size), fill with 1
x = torch.zeros(5, 3)
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [None]:
# check size
print(x.size())
# check data type
print(x.dtype)

torch.Size([5, 3])
torch.float32


In [None]:
# specify types, float32 default
x = torch.zeros(5, 3, dtype=torch.float16)
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float16)


In [None]:
# construct from data
x = torch.tensor([5.5, 3])
print(x.size())

torch.Size([2])


In [None]:
# Converting a Torch Tensor to a NumPy array and vice versa is very easy
a = torch.ones(5)
print(a)

# torch to numpy with .numpy()
b = a.numpy()
print(b)
print(type(b))

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
<class 'numpy.ndarray'>


In [None]:
# Carful: If the Tensor is on the CPU (not the GPU),
# both objects will share the same memory location, so changing one
# will also change the other
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [None]:
# numpy to torch with .from_numpy(x)
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
print(a)
print(b)

# again be careful when modifying
a += 1
print(a)
print(b)

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [None]:
# by default all tensors are created on the CPU,
# but you can also move them to the GPU (only if it's available )
if torch.cuda.is_available():
    device = torch.device("cuda")          # a CUDA device object
    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU
    x = x.to(device)                       # or just use strings ``.to("cuda")``
    z = x + y
    # z = z.numpy() # not possible because numpy cannot handle GPU tenors
    # move to CPU again
    z.to("cpu")       # ``.to`` can also change dtype together!
    # z = z.numpy()

In [None]:
x = torch.rand(2,2)
y = torch.rand(2,2)
print(x)
print(y)
z = x + y
print(z)
z = torch.add(x,y)
print(z)
y.add(x)
print(y)

tensor([[0.8683, 0.0600],
        [0.1364, 0.5356]])
tensor([[0.7748, 0.0649],
        [0.0731, 0.5220]])
tensor([[1.6430, 0.1250],
        [0.2095, 1.0576]])
tensor([[1.6430, 0.1250],
        [0.2095, 1.0576]])
tensor([[0.7748, 0.0649],
        [0.0731, 0.5220]])


In [None]:
x = torch.rand(5,3)
print(x)
print(x[1,:])

tensor([[0.0304, 0.7748, 0.8352],
        [0.3022, 0.4790, 0.1672],
        [0.0235, 0.8304, 0.1735],
        [0.6525, 0.7849, 0.4395],
        [0.0192, 0.4524, 0.1174]])
tensor([0.3022, 0.4790, 0.1672])


In [None]:
# Reshape with torch.view()
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 2)  # the size -1 is inferred from other dimensions
# if -1 it pytorch will automatically determine the necessary size
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([8, 2])


In [None]:
## autograd and gradients
# The autograd package provides automatic differentiation 
# for all operations on Tensors
# requires_grad = True -> tracks all operations on the tensor. 
x = torch.randn(3, requires_grad=True)
y = x + 2

# y was created as a result of an operation, so it has a grad_fn attribute.
# grad_fn: references a Function that has created the Tensor
print(x) # created by the user -> grad_fn is None
print(y)
print(y.grad_fn)

tensor([-1.5510, -1.2571, -0.6856], requires_grad=True)
tensor([0.4490, 0.7429, 1.3144], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7f6b140da908>


In [None]:
z = y * y * 3
print(z)
z = z.mean()
print(z)

tensor([0.6049, 1.6559, 5.1826], grad_fn=<MulBackward0>)
tensor(2.4811, grad_fn=<MeanBackward0>)


In [None]:
# to calculate gradients
z.backward()  #dz/dx
print(x.grad)

tensor([0.8981, 1.4859, 2.6287])


In [None]:
# backward() accumulates the gradient for this tensor into .grad attribute.
# !!! We need to be careful during optimization !!!
# Use .zero_() to empty the gradients before a new optimization step!
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    # just a dummy example
    model_output = (weights*3).sum()
    model_output.backward()
    
    print(weights.grad)

    # # optimize model, i.e. adjust weights...
    with torch.no_grad():
        weights -= 0.1 * weights.grad

    # this is important! It affects the final weights & output
    weights.grad.zero_()

print(weights)
print(model_output)

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([0.1000, 0.1000, 0.1000, 0.1000], requires_grad=True)
tensor(4.8000, grad_fn=<SumBackward0>)


In [None]:
# Optimizer has zero_grad() method
# optimizer = torch.optim.SGD([weights], lr=0.1)
# During training:
# optimizer.step()
# optimizer.zero_grad()

In [None]:
# Back Propogation

x = torch.tensor(1.0)
y = torch.tensor(2.0)

# This is the parameter we want to optimize -> requires_grad=True
w = torch.tensor(1.0, requires_grad=True)

# forward pass to compute loss
y_predicted = w * x
loss = (y_predicted - y)**2
print(loss)

tensor(1., grad_fn=<PowBackward0>)


In [None]:
# backward pass to compute gradient dLoss/dw
loss.backward()
print(w.grad)

tensor(-2.)


In [None]:
# Compute every step manually for backpropagation

# Linear regression
# f = w * x 

# here : f = 2 * x
X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0

# model output
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

# J = MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N * 2x(w*x - y)
def gradient(x, y, y_pred):
    return np.dot(2*x, y_pred - y).mean()



In [None]:
print(f'Prediction before training: f(6) = {forward(6):.3f}')

# Training
learning_rate = 0.01
n_iters = 15

for epoch in range(n_iters):
    # predict = forward pass
    y_pred = forward(X)

    # loss
    l = loss(Y, y_pred)
    
    # calculate gradients
    dw = gradient(X, Y, y_pred)

    # update weights
    w -= learning_rate * dw

    if epoch % 2 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')
     
print(f'Prediction after training: f(6) = {forward(6):.3f}')

Prediction before training: f(6) = 11.999
epoch 1: w = 2.000, loss = 0.00000033
epoch 3: w = 2.000, loss = 0.00000001
epoch 5: w = 2.000, loss = 0.00000000
epoch 7: w = 2.000, loss = 0.00000000
epoch 9: w = 2.000, loss = 0.00000000
epoch 11: w = 2.000, loss = 0.00000000
epoch 13: w = 2.000, loss = 0.00000000
epoch 15: w = 2.000, loss = 0.00000000
Prediction after training: f(6) = 12.000


In [None]:
# 1) Design model (input, output, forward pass with different layers)
# 2) Construct loss and optimizer
# 3) Training loop
#       - Forward = compute prediction and loss
#       - Backward = compute gradients
#       - Update weights


# Linear regression
# f = w * x 

# here : f = 2 * x

# 0) Training samples
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

n_samples, n_features = X.shape
print(f'#samples: {n_samples}, #features: {n_features}')
# 0) create a test sample
X_test = torch.tensor([5], dtype=torch.float32)

# 1) Design Model, the model has to implement the forward pass!
# Here we can use a built-in model from PyTorch
input_size = n_features
output_size = n_features

# we can call this model with samples X
model = nn.Linear(input_size, output_size)

'''
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define diferent layers
        self.lin = nn.Linear(input_dim, output_dim)
    def forward(self, x):
        return self.lin(x)
model = LinearRegression(input_size, output_size)
'''

print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

# 2) Define loss and optimizer
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# 3) Training loop
for epoch in range(n_iters):
    # predict = forward pass with our model
    y_predicted = model(X)

    # loss
    l = loss(Y, y_predicted)

    # calculate gradients = backward pass
    l.backward()

    # update weights
    optimizer.step()

    # zero the gradients after updating
    optimizer.zero_grad()

    if epoch % 10 == 0:
        [w, b] = model.parameters() # unpack parameters
        print('epoch ', epoch+1, ': w = ', w[0][0].item(), ' loss = ', l)

print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')

#samples: 4, #features: 1
Prediction before training: f(5) = 5.192
epoch  1 : w =  0.9910392165184021  loss =  tensor(5.4881, grad_fn=<MeanBackward0>)
epoch  11 : w =  1.5283622741699219  loss =  tensor(0.3362, grad_fn=<MeanBackward0>)
epoch  21 : w =  1.6239278316497803  loss =  tensor(0.1916, grad_fn=<MeanBackward0>)
epoch  31 : w =  1.648167371749878  loss =  tensor(0.1772, grad_fn=<MeanBackward0>)
epoch  41 : w =  1.6606721878051758  loss =  tensor(0.1668, grad_fn=<MeanBackward0>)
epoch  51 : w =  1.6710351705551147  loss =  tensor(0.1571, grad_fn=<MeanBackward0>)
epoch  61 : w =  1.680807113647461  loss =  tensor(0.1480, grad_fn=<MeanBackward0>)
epoch  71 : w =  1.6902445554733276  loss =  tensor(0.1394, grad_fn=<MeanBackward0>)
epoch  81 : w =  1.699395775794983  loss =  tensor(0.1312, grad_fn=<MeanBackward0>)
epoch  91 : w =  1.708275556564331  loss =  tensor(0.1236, grad_fn=<MeanBackward0>)
Prediction after training: f(5) = 9.415
