In [1]:
pip install torch==2.5.0 torchvision==0.20.0 torchaudio==2.5.0 --index-url https://download.pytorch.org/whl/cpu

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cpu
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch

In [3]:
# Number
t1 = torch.tensor(4.)
t1

tensor(4.)

In [4]:
t1.dtype

torch.float32

In [5]:
# Vector
t2 = torch.tensor([1., 2, 3, 4])
t2

tensor([1., 2., 3., 4.])

In [6]:
# Matrix
t3 = torch.tensor([[5., 6],
                   [7, 8],
                   [9, 10]])
t3

tensor([[ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])

In [7]:
# 3-dimensional array
t4 = torch.tensor([
    [[11, 12, 13],
     [13, 14, 15]],
    [[15, 16, 17],
     [17, 18, 19.]]])
t4

tensor([[[11., 12., 13.],
         [13., 14., 15.]],

        [[15., 16., 17.],
         [17., 18., 19.]]])

In [9]:
# Create tensors.
x = torch.tensor(3.)
w = torch.tensor(4., requires_grad=True)
b = torch.tensor(5., requires_grad=True)
x, w, b

(tensor(3.), tensor(4., requires_grad=True), tensor(5., requires_grad=True))

In [10]:
# Arithmetic operations
y = w * x + b
y

tensor(17., grad_fn=<AddBackward0>)

In [11]:
# Arithmetic operations
y = w * x + b
y

tensor(17., grad_fn=<AddBackward0>)

In [12]:
# Compute derivatives
y.backward()

In [13]:
# Display gradients
print('dy/dx:', x.grad)
print('dy/dw:', w.grad)
print('dy/db:', b.grad)

dy/dx: None
dy/dw: tensor(3.)
dy/db: tensor(1.)


## Tensor functions

Apart from arithmetic operations, the `torch` module also contains many functions for creating and manipulating tensors. Let's look at some examples.

In [14]:
# Create a tensor with a fixed value for every element
t6 = torch.full((3, 2), 42)
t6

tensor([[42, 42],
        [42, 42],
        [42, 42]])

In [15]:
# Concatenate two tensors with compatible shapes
t7 = torch.cat((t3, t6))
t7

tensor([[ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.],
        [42., 42.],
        [42., 42.],
        [42., 42.]])

## Interoperability with Numpy

[Numpy](http://www.numpy.org/) is a popular open-source library used for mathematical and scientific computing in Python. It enables efficient operations on large multi-dimensional arrays and has a vast ecosystem of supporting libraries, including:

* [Pandas](https://pandas.pydata.org/) for file I/O and data analysis
* [Matplotlib](https://matplotlib.org/) for plotting and visualization
* [OpenCV](https://opencv.org/) for image and video processing


Instead of reinventing the wheel, PyTorch interoperates well with Numpy to leverage its existing ecosystem of tools and libraries.

In [17]:
import numpy as np

x = np.array([[1, 2], [3, 4.]])
x

array([[1., 2.],
       [3., 4.]])

In [18]:
# Convert the numpy array to a torch tensor.
y = torch.from_numpy(x)
y

tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64)

In [19]:
x.dtype, y.dtype

(dtype('float64'), torch.float64)

In [20]:
# Convert a torch tensor to a numpy array
z = y.numpy()
z

array([[1., 2.],
       [3., 4.]])

## Linear-regression from scrach using pytorch

In [21]:
import numpy as np
import torch

In [22]:
#making training data
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43],
                   [91, 88, 64],
                   [87, 134, 58],
                   [102, 43, 37],
                   [69, 96, 70]], dtype='float32')

In [23]:
# Targets (apples, oranges)
target = np.array([[56, 70],
                    [81, 101],
                    [119, 133],
                    [22, 37],
                    [103, 119]], dtype='float32')

In [24]:
#Convert input and target to tensors
inputs = torch.from_numpy(inputs)
target = torch.from_numpy(target)

print(inputs,"\n")
print(target)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]]) 

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [25]:
# weights and biases
w = torch.randn(2,3 , requires_grad=True)
b = torch.randn(2, requires_grad=True)

print(w)
print(b)

tensor([[ 0.4749, -0.2452,  1.3185],
        [-0.8625,  0.4079, -0.9293]], requires_grad=True)
tensor([ 0.6216, -0.3526], requires_grad=True)


In [26]:
#define the model

def model(x):
  return x @ w.t() + b

In [27]:
# prediction
preds = model(inputs)
print(preds)

tensor([[  75.5578,  -75.9471],
        [ 106.6455, -102.4214],
        [  85.5568,  -74.6337],
        [  87.3037, -105.1736],
        [ 102.1468,  -85.7585]], grad_fn=<AddBackward0>)


In [28]:
#actual
print(target)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [29]:
# loss function MSE
def MSE(actual, target):
  diff = actual - target
  return torch.sum(diff * diff) / diff.numel()

In [30]:
# error
loss = MSE(target, preds)
print(loss)

tensor(17435.5898, grad_fn=<DivBackward0>)


In [31]:
# compute gradients
loss.backward()

In [32]:
print(w, "\n")
print(w.grad)

tensor([[ 0.4749, -0.2452,  1.3185],
        [-0.8625,  0.4079, -0.9293]], requires_grad=True) 

tensor([[  1490.8020,    362.3886,    579.8210],
        [-15171.9336, -16254.5479, -10186.1934]])


In [33]:
print(b, "\n")
print(b.grad)

tensor([ 0.6216, -0.3526], requires_grad=True) 

tensor([  15.2421, -180.7869])


In [34]:
#reset grad
w.grad.zero_()
b.grad.zero_()

print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [35]:
# adjust params

preds = model(inputs)
print(preds)

tensor([[  75.5578,  -75.9471],
        [ 106.6455, -102.4214],
        [  85.5568,  -74.6337],
        [  87.3037, -105.1736],
        [ 102.1468,  -85.7585]], grad_fn=<AddBackward0>)


In [36]:
# loss
loss = MSE(target, preds)
print(loss)

tensor(17435.5898, grad_fn=<DivBackward0>)


In [37]:
loss.backward()

print(w.grad, "\n")
print(b.grad)

tensor([[  1490.8020,    362.3886,    579.8210],
        [-15171.9336, -16254.5479, -10186.1934]]) 

tensor([  15.2421, -180.7869])


In [38]:
  # adjust weight & reset grad
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [39]:
print(w)
print(b)

tensor([[ 0.4600, -0.2488,  1.3127],
        [-0.7108,  0.5704, -0.8274]], requires_grad=True)
tensor([ 0.6215, -0.3508], requires_grad=True)


In [40]:
# calculate again
preds = model(inputs)
loss = MSE(target, preds)
print(loss)

tensor(11964.2363, grad_fn=<DivBackward0>)


In [41]:
# Training for multiple epochs
for i in range(400):
  preds = model(inputs)
  loss = MSE(target, preds)
  loss.backward()

  with torch.no_grad():
     w -= w.grad * 1e-5 # learning rate
     b -= b.grad * 1e-5
     w.grad.zero_()
     b.grad.zero_()
  print(f"Epochs({i}/{100}) & Loss {loss}")

Epochs(0/100) & Loss 11964.236328125
Epochs(1/100) & Loss 8274.884765625
Epochs(2/100) & Loss 5786.42578125
Epochs(3/100) & Loss 4107.259765625
Epochs(4/100) & Loss 2973.490966796875
Epochs(5/100) & Loss 2207.2861328125
Epochs(6/100) & Loss 1688.802490234375
Epochs(7/100) & Loss 1337.2821044921875
Epochs(8/100) & Loss 1098.30078125
Epochs(9/100) & Loss 935.1842651367188
Epochs(10/100) & Loss 823.2176513671875
Epochs(11/100) & Loss 745.744384765625
Epochs(12/100) & Loss 691.5411376953125
Epochs(13/100) & Loss 653.0432739257812
Epochs(14/100) & Loss 625.1532592773438
Epochs(15/100) & Loss 604.4356689453125
Epochs(16/100) & Loss 588.5748291015625
Epochs(17/100) & Loss 576.0101928710938
Epochs(18/100) & Loss 565.689697265625
Epochs(19/100) & Loss 556.9041748046875
Epochs(20/100) & Loss 549.17529296875
Epochs(21/100) & Loss 542.1806030273438
Epochs(22/100) & Loss 535.7025146484375
Epochs(23/100) & Loss 529.59423828125
Epochs(24/100) & Loss 523.7559814453125
Epochs(25/100) & Loss 518.1210937

In [42]:
preds = model(inputs)
loss = MSE(target, preds)
print(loss)

tensor(61.2362, grad_fn=<DivBackward0>)


In [43]:
from math import sqrt
sqrt(loss)

7.825359046181853

In [44]:
preds

tensor([[ 57.6522,  71.0231],
        [ 86.4198,  94.0661],
        [108.3306, 146.8293],
        [ 23.7079,  40.7828],
        [107.8376, 105.3831]], grad_fn=<AddBackward0>)

In [45]:
target

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

## Neural Network using Pytorch

In [47]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt

In [48]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|█████████████████████████████████████████████████████████████████████████████| 26.4M/26.4M [00:13<00:00, 1.99MB/s]


Extracting data\FashionMNIST\raw\train-images-idx3-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████████████████████████████████████████████████████████████████████████| 29.5k/29.5k [00:00<00:00, 180kB/s]


Extracting data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|█████████████████████████████████████████████████████████████████████████████| 4.42M/4.42M [00:02<00:00, 2.02MB/s]


Extracting data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|█████████████████████████████████████████████████████████████████████████████████████| 5.15k/5.15k [00:00<?, ?B/s]

Extracting data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to data\FashionMNIST\raw






In [49]:
type(training_data)

torchvision.datasets.mnist.FashionMNIST

In [50]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    # print(X)
    # print(y)
    break

Shape of X [N, C, H, W]:  torch.Size([64, 1, 28, 28])
Shape of y:  torch.Size([64]) torch.int64


In [51]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [52]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [53]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [54]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [55]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [56]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.302841  [    0/60000]
loss: 2.291980  [ 6400/60000]
loss: 2.273289  [12800/60000]
loss: 2.268360  [19200/60000]
loss: 2.252009  [25600/60000]
loss: 2.221667  [32000/60000]
loss: 2.238112  [38400/60000]
loss: 2.196827  [44800/60000]
loss: 2.196028  [51200/60000]
loss: 2.160621  [57600/60000]
Test Error: 
 Accuracy: 38.8%, Avg loss: 2.156242 

Epoch 2
-------------------------------
loss: 2.165525  [    0/60000]
loss: 2.155755  [ 6400/60000]
loss: 2.100467  [12800/60000]
loss: 2.113699  [19200/60000]
loss: 2.060928  [25600/60000]
loss: 2.007727  [32000/60000]
loss: 2.034311  [38400/60000]
loss: 1.952234  [44800/60000]
loss: 1.957839  [51200/60000]
loss: 1.878014  [57600/60000]
Test Error: 
 Accuracy: 59.0%, Avg loss: 1.882395 

Epoch 3
-------------------------------
loss: 1.915700  [    0/60000]
loss: 1.886109  [ 6400/60000]
loss: 1.770703  [12800/60000]
loss: 1.802860  [19200/60000]
loss: 1.694619  [25600/60000]
loss: 1.652606  [32000/600

In [None]:
#save model
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

In [None]:
#load model
model = NeuralNetwork()
model.load_state_dict(torch.load("model.pth"))

In [None]:
## Prediction

classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')