# Tensors

Create an Integer Tensor

In [1]:
import torch

t1 = torch.tensor(1.)
t1

tensor(1.)

In [2]:
t1.dtype

torch.float32

Create a Vector Tensor,

In [3]:
t2 = torch.tensor([1,2.,3,4])
t2

tensor([1., 2., 3., 4.])

Create a Matrix Tensor

In [4]:
t3 = torch.tensor([
    [1.,2,3],
    [4,5,6]
])
t3

tensor([[1., 2., 3.],
        [4., 5., 6.]])

Create a 3D Array

In [5]:
t4 = torch.tensor([
    [
        [1,2,3.],
        [4,5,6]
    ],
    [
        [7,8,9],
        [10,11,12]
    ]
])
t4

tensor([[[ 1.,  2.,  3.],
         [ 4.,  5.,  6.]],

        [[ 7.,  8.,  9.],
         [10., 11., 12.]]])

Print the created tensor and each shape

In [6]:
print(t1)
t1.shape

tensor(1.)


torch.Size([])

In [7]:
print(t2)
t2.shape

tensor([1., 2., 3., 4.])


torch.Size([4])

In [8]:
print(t3)
t3.shape

tensor([[1., 2., 3.],
        [4., 5., 6.]])


torch.Size([2, 3])

In [9]:
print(t4)
t4.shape

tensor([[[ 1.,  2.,  3.],
         [ 4.,  5.,  6.]],

        [[ 7.,  8.,  9.],
         [10., 11., 12.]]])


torch.Size([2, 2, 3])

How about creating a tensor with different shape?

In [10]:
t5 = torch.tensor([
    [1,2.,3],
    [4,5],
    [6,7]
])

ValueError: expected sequence of length 3 at dim 1 (got 2)

### Tensor Operation and Gradients

In [11]:
x = torch.tensor(1.) # we do not need the gradient from the x (or usually called input)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)
x, w, b

(tensor(1.), tensor(2., requires_grad=True), tensor(3., requires_grad=True))

In [12]:
y = w * x + b
y

tensor(5., grad_fn=<AddBackward0>)

Compute the gradients of `y`

In [13]:
y.backward()
y

tensor(5., grad_fn=<AddBackward0>)

Now we display the gradients

In [14]:
print('dy/dx', x.grad)
print('dy/dw', w.grad)
print('dy/db', b.grad)

dy/dx None
dy/dw tensor(1.)
dy/db tensor(1.)


### Tensor Functions

Create a tensor with a fixed value for every element

In [15]:
# will create a matrix of 3 row and 2 columns with 42 as value
t6 = torch.full((2,3), 42)
t6

tensor([[42, 42, 42],
        [42, 42, 42]])

In [16]:
# concatenate two tensors
t7 = torch.cat((t3,t6))
t3, t6, t7

(tensor([[1., 2., 3.],
         [4., 5., 6.]]),
 tensor([[42, 42, 42],
         [42, 42, 42]]),
 tensor([[ 1.,  2.,  3.],
         [ 4.,  5.,  6.],
         [42., 42., 42.],
         [42., 42., 42.]]))

In [17]:
# get the sin of the tensor
t8 = torch.sin(t7)
t8

tensor([[ 0.8415,  0.9093,  0.1411],
        [-0.7568, -0.9589, -0.2794],
        [-0.9165, -0.9165, -0.9165],
        [-0.9165, -0.9165, -0.9165]])

In [18]:
# reshape from 4 x 3 matrix to 3 dimensions 2 rows and 2 columns
t9 = t8.reshape(3,2,2)
t9

tensor([[[ 0.8415,  0.9093],
         [ 0.1411, -0.7568]],

        [[-0.9589, -0.2794],
         [-0.9165, -0.9165]],

        [[-0.9165, -0.9165],
         [-0.9165, -0.9165]]])

# Gradient Descent and Linear Regression

Create training data for temperature, rainfall, humidity. Our task is to predict the yield of apple and orange

In [19]:
import numpy as np

inputs = np.array([
    [73,67,43],
    [91,88,64],
    [87,134,58],
    [102,43,37],
    [69,96,70]
], dtype=np.float32)
inputs

array([[ 73.,  67.,  43.],
       [ 91.,  88.,  64.],
       [ 87., 134.,  58.],
       [102.,  43.,  37.],
       [ 69.,  96.,  70.]], dtype=float32)

In [20]:
# first column is for apple yield, second is for orange yield
targets = np.array([
    [56,70],
    [81,101],
    [119,133],
    [22,37],
    [103,119]
], dtype=np.float32)
targets

array([[ 56.,  70.],
       [ 81., 101.],
       [119., 133.],
       [ 22.,  37.],
       [103., 119.]], dtype=float32)

Convert to tensor

In [21]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
inputs, targets

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.],
         [102.,  43.,  37.],
         [ 69.,  96.,  70.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.]]))

### Create Linear Regression

In [22]:
# 2 row for apple and orange, 3 columns for features
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
w, b

(tensor([[ 0.5305,  1.4927, -0.6954],
         [-0.8306,  1.4336,  1.3536]], requires_grad=True),
 tensor([0.3500, 1.7211], requires_grad=True))

In [23]:
def linear_regression(x):
    # @ is for matrix multiplication or we could use torch.matmul(x, w.t())
    return x @ w.t() + b

yhat = linear_regression(inputs)
yhat

tensor([[109.1883,  95.3425],
        [135.4812, 138.9227],
        [206.1977, 200.0692],
        [ 92.9203,  28.7269],
        [131.5791, 176.7864]], grad_fn=<AddBackward0>)

In [24]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

### Loss Function



In [25]:
def mean_squared_error(y_pred, y_true):
    diff = y_pred - y_true
    return torch.sum(diff ** 2) / diff.numel()

loss = mean_squared_error(yhat, targets)
loss

tensor(2923.3464, grad_fn=<DivBackward0>)

### Compute Gradients

In [26]:
loss.backward()

In [27]:
w, w.grad

(tensor([[ 0.5305,  1.4927, -0.6954],
         [-0.8306,  1.4336,  1.3536]], requires_grad=True),
 tensor([[5126.5137, 5167.1255, 3091.1904],
         [2855.8801, 3842.8354, 2229.1484]]))

In [28]:
b, b.grad

(tensor([0.3500, 1.7211], requires_grad=True), tensor([58.8733, 35.9696]))

### Adjust Weights and Biases to Reduce the Loss

In [29]:
# the use of torch.no_grad() is to indicate PyTorch that we shouldnt track, calculate, or modify gradients while updating the weights and biases
with torch.no_grad():
    w -= w.grad * 1e-5 # learning rate
    b -= b.grad * 1e-5

In [30]:
yhat = linear_regression(inputs)
yhat

tensor([[100.6542,  89.7241],
        [124.2901, 131.5151],
        [193.0202, 191.1420],
        [ 84.3250,  23.3363],
        [120.9170, 169.5660]], grad_fn=<AddBackward0>)

In [31]:
loss = mean_squared_error(yhat, targets)
loss

tensor(2099.6763, grad_fn=<DivBackward0>)

Before proceeding, reset gradients to zero. Because PyTorch accumulates gradient, otherwise when using the `backward()` function again, the new gradient values are added to existing gradients

In [32]:
w.grad.zero_()
b.grad.zero_()
w.grad, b.grad

(tensor([[0., 0., 0.],
         [0., 0., 0.]]),
 tensor([0., 0.]))

### Train the Model with Gradient Descent

In [33]:
yhat = linear_regression(inputs)
yhat

tensor([[100.6542,  89.7241],
        [124.2901, 131.5151],
        [193.0202, 191.1420],
        [ 84.3250,  23.3363],
        [120.9170, 169.5660]], grad_fn=<AddBackward0>)

In [34]:
loss = mean_squared_error(yhat, targets)
loss

tensor(2099.6763, grad_fn=<DivBackward0>)

In [35]:
loss.backward()
w.grad, b.grad

(tensor([[4246.4668, 4224.0142, 2508.8164],
         [2274.0898, 3212.9336, 1841.4807]]),
 tensor([48.4413, 29.0567]))

In [36]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [37]:
yhat = linear_regression(inputs)
loss = mean_squared_error(yhat, targets)
loss

tensor(1543.4600, grad_fn=<DivBackward0>)

In [39]:
epochs = 100
for i in range(epochs):
    yhat = linear_regression(inputs)
    loss = mean_squared_error(yhat, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()
    print(f'Epoch {i+1}/{epochs} - Loss: {loss}')

Epoch 0/100 - Loss: 1543.4599609375
Epoch 1/100 - Loss: 864.79150390625
Epoch 2/100 - Loss: 707.8923950195312
Epoch 3/100 - Loss: 601.0569458007812
Epoch 4/100 - Loss: 527.9724731445312
Epoch 5/100 - Loss: 477.6439514160156
Epoch 6/100 - Loss: 442.6639099121094
Epoch 7/100 - Loss: 418.0394592285156
Epoch 8/100 - Loss: 400.4058837890625
Epoch 9/100 - Loss: 387.4954833984375
Epoch 10/100 - Loss: 377.7800598144531
Epoch 11/100 - Loss: 370.2298278808594
Epoch 12/100 - Loss: 364.1502380371094
Epoch 13/100 - Loss: 359.0736389160156
Epoch 14/100 - Loss: 354.6842346191406
Epoch 15/100 - Loss: 350.7693176269531
Epoch 16/100 - Loss: 347.1853942871094
Epoch 17/100 - Loss: 343.8355407714844
Epoch 18/100 - Loss: 340.6543884277344
Epoch 19/100 - Loss: 337.59771728515625
Epoch 20/100 - Loss: 334.63568115234375
Epoch 21/100 - Loss: 331.7477111816406
Epoch 22/100 - Loss: 328.9205017089844
Epoch 23/100 - Loss: 326.1439514160156
Epoch 24/100 - Loss: 323.4122619628906
Epoch 25/100 - Loss: 320.720397949218

In [40]:
yhat = linear_regression(inputs)
loss = mean_squared_error(yhat, targets)
loss

tensor(186.8787, grad_fn=<DivBackward0>)

In [41]:
targets, yhat

(tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.]]),
 tensor([[ 60.0579,  66.9381],
         [ 72.9384,  99.6642],
         [135.1233, 140.6711],
         [ 36.4272,  15.8782],
         [ 76.9092, 130.0619]], grad_fn=<AddBackward0>))

# Using PyTorch

In [42]:
from torch.utils.data import TensorDataset

train_ds = TensorDataset(inputs, targets)
train_ds

<torch.utils.data.dataset.TensorDataset at 0x1096a9d10>

In [43]:
from torch.utils.data import DataLoader

train_dl = DataLoader(train_ds, shuffle=True)
train_dl

<torch.utils.data.dataloader.DataLoader at 0x1167519d0>

In [44]:
for x, y in train_dl:
    print(x, y)
    break

tensor([[102.,  43.,  37.]]) tensor([[22., 37.]])


In [45]:
from torch import nn

model = nn.Linear(3,2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.3683, -0.3759, -0.3779],
        [-0.5106, -0.2519,  0.0807]], requires_grad=True)
Parameter containing:
tensor([ 0.5313, -0.3205], requires_grad=True)


In [46]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.3683, -0.3759, -0.3779],
         [-0.5106, -0.2519,  0.0807]], requires_grad=True),
 Parameter containing:
 tensor([ 0.5313, -0.3205], requires_grad=True)]

In [47]:
yhat = model(inputs)
yhat

tensor([[ -67.7912,  -50.9966],
        [ -90.2509,  -63.7809],
        [-103.8019,  -73.8094],
        [ -67.1828,  -60.2426],
        [ -87.4230,  -54.0788]], grad_fn=<AddmmBackward0>)

### Loss Function

In [48]:
from torch.nn import functional as F

loss_fn = F.mse_loss

In [49]:
loss = loss_fn(yhat, targets)
loss

tensor(26248.1758, grad_fn=<MseLossBackward0>)

### Optimizer

In [50]:
from torch.optim import SGD

opt = SGD(model.parameters(), lr=1e-5)
opt

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 1e-05
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [53]:
def fit(epochs, model, loss_fn, opt, train_dl):
    for epoch in range(epochs):
        for x, y in train_dl:
            # generate prediction
            yhat = model(x)

            # calculate loss
            loss = loss_fn(yhat, y)

            # compute backprop
            loss.backward()

            # update parameter
            opt.step()

            # reset gradient to zero
            opt.zero_grad()
        if (epoch+1) % 10 == 0:
            print(f'Epoch {epoch+1}/{epochs} - Loss: {loss.item()}')

In [54]:
epochs = 100
fit(epochs, model, loss_fn, opt, train_dl)

Epoch 10/100 - Loss: 309.8774108886719
Epoch 20/100 - Loss: 20.270511627197266
Epoch 30/100 - Loss: 1.775602102279663
Epoch 40/100 - Loss: 105.69273376464844
Epoch 50/100 - Loss: 61.20269012451172
Epoch 60/100 - Loss: 7.004505157470703
Epoch 70/100 - Loss: 3.079195976257324
Epoch 80/100 - Loss: 3.332244396209717
Epoch 90/100 - Loss: 2.338931083679199
Epoch 100/100 - Loss: 31.582801818847656


In [55]:
yhat = model(inputs)
yhat

tensor([[ 58.1421,  70.8210],
        [ 80.5763,  99.7070],
        [123.1555, 135.7992],
        [ 24.1640,  38.9944],
        [ 97.2224, 116.1385]], grad_fn=<AddmmBackward0>)

In [56]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [57]:
loss = loss_fn(yhat, targets)
loss

tensor(8.2445, grad_fn=<MseLossBackward0>)