In [1]:
import torch

In [2]:
torch.cuda.is_available()  # check if GPU is available

  return torch._C._cuda_getDeviceCount() > 0


False

# Automatic differentiation using Autograd

In [3]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [4]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [5]:
z = y * y * 3
out = z.mean()
print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


In [6]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)

False


In [7]:
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

True
<SumBackward0 object at 0x7f04643d4dc0>


In [8]:
out.backward()
print(x.grad)  # d(out) / dx

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [9]:
x = torch.randn(3, requires_grad=True)
y = x * 2
while y.data.norm() < 1000:
    y = y * 2
print(x)
print(y)

tensor([-0.1443, -2.1702,  0.5571], requires_grad=True)
tensor([  -73.8692, -1111.1251,   285.2525], grad_fn=<MulBackward0>)


# Linear regression manually

In [10]:
import numpy as np

Try to learn the function $f(x) = 2x$ 

In [11]:
X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)
w = 0.0

In [12]:
# model prediction
def forward(x):
    return w * x

# MSE loss
def loss(y, y_pred):
    return ((y - y_pred) ** 2).mean()

# gradient
def gradient(x, y, y_pred):
    return (np.dot(2*x, y_pred - y)).mean()

In [13]:
print(f'Prediction before training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000


In [14]:
# training
learning_rate = 0.01
n_iters = 20
for epoch in range(n_iters):
    y_pred = forward(X)
    l = loss(Y, y_pred)
    dw = gradient(X, Y, y_pred)
    w -= learning_rate * dw
    if epoch % 2 == 0:
        print(f'Epoch {epoch+1}: weights = {w:.3f}, loss = {l:.8f}')
              
print(f'Prediction after training: f(5) = {forward(5):.3f}')

Epoch 1: weights = 1.200, loss = 30.00000000
Epoch 3: weights = 1.872, loss = 0.76800019
Epoch 5: weights = 1.980, loss = 0.01966083
Epoch 7: weights = 1.997, loss = 0.00050332
Epoch 9: weights = 1.999, loss = 0.00001288
Epoch 11: weights = 2.000, loss = 0.00000033
Epoch 13: weights = 2.000, loss = 0.00000001
Epoch 15: weights = 2.000, loss = 0.00000000
Epoch 17: weights = 2.000, loss = 0.00000000
Epoch 19: weights = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


# Linear regression using pytorch (only gradient computation)

In [15]:
import torch

In [16]:
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [17]:
# training
learning_rate = 0.01
n_iters = 50
for epoch in range(n_iters):
    y_pred = forward(X)
    l = loss(Y, y_pred)
    l.backward()  # compute dl/dw
    
    # update gradients
    with torch.no_grad():
        w -= learning_rate * w.grad
        
    # zero gradients in-place (very imp!)
    # otherwise gradients will accumulate over iterations
    w.grad.zero_()
        
        
    if epoch % 5 == 0:
        print(f'Epoch {epoch+1}: weights = {w:.3f}, loss = {l:.8f}')
              
print(f'Prediction after training: f(5) = {forward(5):.3f}')

Epoch 1: weights = 0.300, loss = 30.00000000
Epoch 6: weights = 1.246, loss = 5.90623236
Epoch 11: weights = 1.665, loss = 1.16278565
Epoch 16: weights = 1.851, loss = 0.22892261
Epoch 21: weights = 1.934, loss = 0.04506890
Epoch 26: weights = 1.971, loss = 0.00887291
Epoch 31: weights = 1.987, loss = 0.00174685
Epoch 36: weights = 1.994, loss = 0.00034392
Epoch 41: weights = 1.997, loss = 0.00006770
Epoch 46: weights = 1.999, loss = 0.00001333
Prediction after training: f(5) = 9.997


**Observation**: The autograd function is not as exact as the numerically computed gradient, so it needs more iterations.

# Linear regression using pytorch

In [46]:
import torch
import torch.nn as nn

In [47]:
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

In [48]:
# pytorch expects data in format n_samples, n_features
# therefore we need to reshape the data
X = X.view(X.shape[0], 1)
Y = Y.view(X.shape[0], 1)
print(X)
print(Y)

tensor([[1.],
        [2.],
        [3.],
        [4.]])
tensor([[2.],
        [4.],
        [6.],
        [8.]])


In [49]:
# define the model
class LinearRegression(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearRegression, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    
    def forward(self, x):
        return self.linear(x)

In [50]:
n_samples, n_features = X.shape
input_size = n_features
output_size = 1

In [51]:
model = LinearRegression(input_size, output_size)

In [52]:
learning_rate = 0.01

# define loss
criterion = nn.MSELoss()

# define optimizer
# model.parameters() are the weights
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 

In [53]:
# training loop
n_iters = 100
for epoch in range(n_iters):
    y_pred = model(X)  # forward pass
    l = criterion(y_pred, Y)  # compute loss
    l.backward()  # backcward pass
    
    optimizer.step() # update weights
    optimizer.zero_grad()
        
    w, b = model.parameters()
    if epoch % 5 == 0:
        print(f'Epoch {epoch+1}: weights = {w[0][0]:.3f}, loss = {l:.8f}')

X_test = torch.tensor([5], dtype=torch.float32)  # test point must also be a tensor
print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')

Epoch 1: weights = -0.001, loss = 46.26792526
Epoch 6: weights = 1.112, loss = 7.46706581
Epoch 11: weights = 1.560, loss = 1.22525179
Epoch 16: weights = 1.741, loss = 0.22054875
Epoch 21: weights = 1.815, loss = 0.05825001
Epoch 26: weights = 1.846, loss = 0.03147167
Epoch 31: weights = 1.859, loss = 0.02651152
Epoch 36: weights = 1.866, loss = 0.02508005
Epoch 41: weights = 1.870, loss = 0.02423492
Epoch 46: weights = 1.872, loss = 0.02350228
Epoch 51: weights = 1.874, loss = 0.02280535
Epoch 56: weights = 1.876, loss = 0.02213126
Epoch 61: weights = 1.878, loss = 0.02147749
Epoch 66: weights = 1.880, loss = 0.02084307
Epoch 71: weights = 1.882, loss = 0.02022736
Epoch 76: weights = 1.884, loss = 0.01962989
Epoch 81: weights = 1.885, loss = 0.01905004
Epoch 86: weights = 1.887, loss = 0.01848734
Epoch 91: weights = 1.889, loss = 0.01794124
Epoch 96: weights = 1.891, loss = 0.01741129
Prediction after training: f(5) = 9.777


In [55]:
model(X_test).detach()

tensor([9.7772])

In [60]:
for parameter in model.parameters():
    print(parameter)

Parameter containing:
tensor([[1.8918]], requires_grad=True)
Parameter containing:
tensor([0.3181], requires_grad=True)


In [61]:
print(model)

LinearRegression(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)
