## 1 Basics

In [1]:
import torch

In [2]:
torch.__version__

'2.8.0+cu128'

## 2 Understanding tensors

In [3]:
tensor0d = torch.tensor(1)
tensor1d = torch.tensor([1, 2, 3])
print(tensor1d.dtype)

torch.int64


## 3 Seeing models as computation graphs

## 4 Automatic differentiation made easy

In [4]:
import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b = torch.tensor([0.0], requires_grad=True)

z = x1 * w1 + b
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a, y)

grad_L_w1 = grad(loss, w1, retain_graph=True)
grad_L_b = grad(loss, b, retain_graph=True)

In [5]:
print(grad_L_w1)
print(grad_L_b)

(tensor([-0.0898]),)
(tensor([-0.0817]),)


In [6]:
loss.backward()
print(w1.grad)
print(b.grad)

tensor([-0.0898])
tensor([-0.0817])


## 5 Implementing multilayer neural networks

In [7]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()

        self.layers = torch.nn.Sequential(

            # 1st hidden layer
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),

            # 2rd hidden layer
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            # output layer
            torch.nn.Linear(20, num_outputs),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits


In [8]:
model = NeuralNetwork(50, 3)

In [9]:
print(model)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


In [10]:
num_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad
)
print("Total number of trainale model parameters:", num_params)

Total number of trainale model parameters: 2213


In [11]:
print(model.layers[0].weight)

Parameter containing:
tensor([[ 5.9422e-02,  5.5118e-02,  6.2693e-02,  ...,  1.4865e-03,
         -1.3662e-02, -5.3007e-02],
        [ 1.7445e-02,  7.4023e-03,  4.5821e-02,  ..., -8.6331e-02,
         -2.7484e-03,  1.7355e-02],
        [-1.0053e-01,  7.3779e-02, -9.4147e-02,  ..., -3.0979e-02,
         -1.5908e-02, -7.3671e-02],
        ...,
        [ 5.9256e-02, -2.6378e-02, -7.0677e-02,  ...,  8.4910e-02,
         -1.1988e-01, -2.7071e-02],
        [ 5.5398e-05, -9.2440e-02,  9.3009e-02,  ...,  4.1270e-02,
         -1.4800e-02, -8.2596e-02],
        [-4.8552e-02, -1.0851e-01, -7.0605e-02,  ...,  1.2671e-01,
         -1.6953e-04, -6.1297e-02]], requires_grad=True)


In [12]:
print(model.layers[0].weight.shape)

torch.Size([30, 50])


In [13]:
print(model.layers[0].bias)
print(model.layers[0].bias.shape)

Parameter containing:
tensor([-1.3754e-01,  8.6949e-02, -1.3905e-01,  9.1665e-02,  4.8055e-02,
        -1.1450e-01,  3.0235e-02,  1.0238e-01,  1.4847e-02, -1.8592e-02,
        -4.0450e-02,  1.3827e-02,  2.1626e-02,  6.2188e-02,  6.8854e-02,
         3.9338e-03, -1.3456e-03, -2.2758e-02,  3.6728e-02,  1.1699e-01,
        -1.0772e-01, -1.3267e-01, -7.1864e-02,  5.0572e-02, -1.2540e-01,
        -3.6188e-02, -1.6197e-02,  9.8944e-05,  1.3698e-02,  1.5178e-02],
       requires_grad=True)
torch.Size([30])


In [14]:
torch.manual_seed(123)

model = NeuralNetwork(50, 3)
print(model.layers[0].weight)

Parameter containing:
tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],
        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],
        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],
        ...,
        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],
        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],
        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],
       requires_grad=True)


In [15]:
torch.manual_seed(123)

X = torch.rand((1, 50))
out = model(X)
print(out)

tensor([[-0.1262,  0.1080, -0.1792]], grad_fn=<AddmmBackward0>)


In [16]:
with torch.no_grad():
    out = model(X)
print(out)

tensor([[-0.1262,  0.1080, -0.1792]])


In [17]:
with torch.no_grad():
    out = torch.softmax(model(X), dim=1)
print(out)

tensor([[0.3113, 0.3934, 0.2952]])


## 6 Setting up efficient data loaders

In [18]:
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])

y_train = torch.tensor([0, 0, 0, 1, 1])

X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6]
])

y_test = torch.tensor([0, 1])

In [19]:
from torch.utils.data import Dataset

class ToyDataset(Dataset):
    def __init__(self, X, y):
        self.features = X 
        self.labels = y

    def __getitem__(self, index):
        one_x = self.features[index]
        one_y = self.labels[index]
        return one_x, one_y
    
    def __len__(self):
        return self.labels.shape[0]

train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)

In [20]:
print(len(train_ds))
print(train_ds[1])

5
(tensor([-0.9000,  2.9000]), tensor(0))


In [21]:
from torch.utils.data import DataLoader

torch.manual_seed(123)

train_loader = DataLoader(
    dataset = train_ds,
    batch_size = 2,
    shuffle = True,
    num_workers = 0,
    drop_last = True

)

In [22]:
test_ds = ToyDataset(X_test, y_test)

test_loader = DataLoader(
    dataset = test_ds,
    batch_size = 2,
    shuffle = False,
    num_workers = 0,
    drop_last = True
)

In [23]:
for idx, (x, y) in enumerate(train_loader):
    print(f'Batch {idx + 1}', x, y)

Batch 1 tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]) tensor([1, 0])
Batch 2 tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])


## 7 A typical training loop

In [24]:
import torch.nn.functional as F 

torch.manual_seed(123)
model = NeuralNetwork(num_inputs=2, num_outputs=2)
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)


num_epochs = 3

for epoch in range(num_epochs):


    model.train()
    for batch_idx, (features, labels) in enumerate(train_loader):

        logits = model(features)


        loss = F.cross_entropy(logits, labels)


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        ### LOADING
        print(f'Epoch {epoch+1:03d}/{num_epochs:03d}'
              f' | Batch {batch_idx:03d}/{len(train_loader):03d}'
              f' | Train/Val Loss: {loss:.2f}')
        
    model.eval()
    # Optional model evaluation


Epoch 001/003 | Batch 000/002 | Train/Val Loss: 0.75
Epoch 001/003 | Batch 001/002 | Train/Val Loss: 0.65
Epoch 002/003 | Batch 000/002 | Train/Val Loss: 0.44
Epoch 002/003 | Batch 001/002 | Train/Val Loss: 0.13
Epoch 003/003 | Batch 000/002 | Train/Val Loss: 0.03
Epoch 003/003 | Batch 001/002 | Train/Val Loss: 0.00


In [25]:
model.eval()

with torch.no_grad():
    outputs = model(X_train)

print(outputs)

tensor([[ 2.8569, -4.1618],
        [ 2.5382, -3.7548],
        [ 2.0944, -3.1820],
        [-1.4814,  1.4816],
        [-1.7176,  1.7342]])


In [26]:
torch.set_printoptions(sci_mode=False)
probas = torch.softmax(outputs, dim=1)
print(probas)

tensor([[    0.9991,     0.0009],
        [    0.9982,     0.0018],
        [    0.9949,     0.0051],
        [    0.0491,     0.9509],
        [    0.0307,     0.9693]])


In [27]:
predictions = torch.argmax(probas, dim=1)
print(predictions)

tensor([0, 0, 0, 1, 1])


In [28]:
predictions = torch.argmax(outputs, dim=1)
print(predictions)

tensor([0, 0, 0, 1, 1])


In [29]:
predictions == y_train

tensor([True, True, True, True, True])

In [30]:
torch.sum(predictions == y_train)

tensor(5)

In [31]:
def compute_accuracy(model, dataloader):

    model = model.eval()
    correct = 0.0
    total_examples = 0


    for idx, (features, labels) in enumerate(dataloader):

        with torch.no_grad():
            logits = model(features)

        
        predictions = torch.argmax(logits, dim=1)
        compare = predictions==labels
        correct += torch.sum(compare)
        total_examples += len(compare)

    return (correct / total_examples).item()

        

In [32]:
compute_accuracy(model, test_loader)

1.0

In [33]:
compute_accuracy(model, train_loader)

1.0

## 8 Saving and loading models

In [34]:
torch.save(model.state_dict(), "model.pth")

In [35]:
model = NeuralNetwork(2, 2)
model.load_state_dict(torch.load("model.pth", weights_only=True))

<All keys matched successfully>

## 9 Optimizing training performance with GPUs 

###  9.1 Pytorch computations on GPU devices

In [37]:
print(torch.cuda.is_available)

<function is_available at 0x71cce465fce0>


In [39]:
tensor_1 = torch.tensor([1, 2, 3])
tensor_2 = torch.tensor([4, 5, 6])
print(tensor_1 + tensor_2)

tensor([5, 7, 9])


In [41]:
tensor_1 = tensor_1.to('cuda')
tensor_2 = tensor_2.to('cuda')
print(tensor_1 + tensor_2)

tensor([5, 7, 9], device='cuda:0')


### 9.2  Single-GPU training

In [43]:
torch.manual_seed(123)
model = NeuralNetwork(num_inputs=2, num_outputs=2)


device = torch.device('cuda')
model.to(device)


optimizer = torch.optim.SGD(model.parameters(), lr=0.5)


num_epochs = 3


for epoch in range(num_epochs):


    model.train()
    for barch_idx, (features, labels) in enumerate(train_loader):

        features, labels = features.to(device), labels.to(device)
        logits = model(features)
        loss = F.cross_entropy(logits, labels)

        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        ### LOADING
        print(f"Epoch: {epoch+1:03d}/{num_epochs:03d}"
              f" | Batch {batch_idx:03d}/{len(train_loader):03d}"
              f" | Train/Val Loss: {loss:.2f}")
        
    model.eval()

Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.75
Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65
Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.44
Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13
Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.03
Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00


### 9.3 Training with multiple GPUs
