📝 **Author:** Amirhossein Heydari - 📧 **Email:** amirhosseinheydari78@gmail.com - 📍 **Linktree:** [linktr.ee/mr_pylin](https://linktr.ee/mr_pylin)

---

# Dependencies

In [2]:
import torch
from torch import nn, optim
from torchinfo import summary

# Consider An Initialized Model As Trained

In [3]:
trained_model = nn.Sequential(
    nn.Linear(4, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1),
    nn.Sigmoid()
)

# log
print(trained_model)

Sequential(
  (0): Linear(in_features=4, out_features=2, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=2, out_features=1, bias=True)
  (3): Sigmoid()
)


In [4]:
summary(trained_model, input_size=(16, 4), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [16, 1]                   --
├─Linear: 1-1                            [16, 2]                   10
├─Sigmoid: 1-2                           [16, 2]                   --
├─Linear: 1-3                            [16, 1]                   3
├─Sigmoid: 1-4                           [16, 1]                   --
Total params: 13
Trainable params: 13
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [5]:
# weights and biases per layer (using model.parameters())
for i, param in enumerate(trained_model.parameters()):
    if i % 2 == 0:  # weights of the model
        print(str(param).replace("Parameter containing:", f"weights (layer {i // 2 + 1}):"), end='\n\n')
    else:  # biases of the model
        print(str(param).replace("Parameter containing:", f"biases (layer {(i-1) // 2 + 1}):"), end='\n\n')

weights (layer 1):
tensor([[ 0.1658, -0.3584, -0.0057, -0.3216],
        [ 0.2297,  0.4038, -0.1256,  0.3223]], requires_grad=True)

biases (layer 1):
tensor([-0.1368, -0.4269], requires_grad=True)

weights (layer 2):
tensor([[0.7063, 0.4341]], requires_grad=True)

biases (layer 2):
tensor([0.2903], requires_grad=True)



In [6]:
# weights and biases per layer (using model.state_dict())
for param in trained_model.state_dict().items():
    print(param)

('0.weight', tensor([[ 0.1658, -0.3584, -0.0057, -0.3216],
        [ 0.2297,  0.4038, -0.1256,  0.3223]]))
('0.bias', tensor([-0.1368, -0.4269]))
('2.weight', tensor([[0.7063, 0.4341]]))
('2.bias', tensor([0.2903]))


# Save & Load
   - The extension `.pth` has no specific meaning to PyTorch internally.
   - `.pth` (or sometimes `.pt`) is used conventionally to indicate the file contains a PyTorch model or parameters.

📝 **Docs & Tutorials** 📚:
   - torch.save: [pytorch.org/docs/stable/generated/torch.save.html](https://pytorch.org/docs/stable/generated/torch.save.html)
   - torch.load: [pytorch.org/docs/stable/generated/torch.load.html](https://pytorch.org/docs/stable/generated/torch.load.html)
   - Saving and Loading Models: [pytorch.org/tutorials/beginner/saving_loading_models.html](https://pytorch.org/tutorials/beginner/saving_loading_models.html)
   - Save and Load the Model: [pytorch.org/tutorials/beginner/basics/saveloadrun_tutorial.html](https://pytorch.org/tutorials/beginner/basics/saveloadrun_tutorial.html)

### Save and Load ONLY Parameters
   - This is the recommended approach.
   - Model architecture can be defined separately and changed without issues
   - Efficient for saving memory and storage

In [None]:
# get model parameters
trained_model_parameters = trained_model.state_dict()

# save
torch.save(obj=trained_model_parameters, f='../../assets/models/model_1.pth')

In [None]:
# load
weights = torch.load(f='../../assets/models/model_1.pth', weights_only=True)

# log
weights

OrderedDict([('0.weight',
              tensor([[-0.4132, -0.3434, -0.2125,  0.4822],
                      [-0.4512,  0.3472,  0.4492, -0.4743]])),
             ('0.bias', tensor([-0.4894, -0.4634])),
             ('2.weight', tensor([[-0.3045,  0.6727]])),
             ('2.bias', tensor([-0.1564]))])

In [None]:
# insert weights to the model
model_1 = nn.Sequential(
    nn.Linear(4, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1),
    nn.Sigmoid()
)

model_1.load_state_dict(weights)

# log
for param in model_1.state_dict().items():
    print(param)

('0.weight', tensor([[-0.4132, -0.3434, -0.2125,  0.4822],
        [-0.4512,  0.3472,  0.4492, -0.4743]]))
('0.bias', tensor([-0.4894, -0.4634]))
('2.weight', tensor([[-0.3045,  0.6727]]))
('2.bias', tensor([-0.1564]))


### Save & Load the ENTIRE Model
   - ✅ Easier to use since you don’t need to redefine the model architecture.
   - ⚠️ Not portable across different PyTorch versions.

In [None]:
# save
torch.save(obj=trained_model, f='../../assets/models/model_2.pth')

# load
model_2 = torch.load(f='../../assets/models/model_2.pth', weights_only=False)

# log
model_2

Sequential(
  (0): Linear(in_features=4, out_features=2, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=2, out_features=1, bias=True)
  (3): Sigmoid()
)

In [None]:
# log
for param in model_2.state_dict().items():
    print(param)

('0.weight', tensor([[-0.4132, -0.3434, -0.2125,  0.4822],
        [-0.4512,  0.3472,  0.4492, -0.4743]]))
('0.bias', tensor([-0.4894, -0.4634]))
('2.weight', tensor([[-0.3045,  0.6727]]))
('2.bias', tensor([-0.1564]))


### Saving & Loading a General Checkpoint for Inference and/or Resuming Training
   - you can save a checkpoint whenever you are training the model at each epoch

In [None]:
epoch = 10
criterion = nn.MSELoss()
optimizer = optim.SGD(params=trained_model.parameters(), lr=0.01)

In [None]:
# save both model and optimizer state_dict for resuming training
torch.save(
    obj={
        'model_state_dict': trained_model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'epoch': epoch,  # Save the epoch to resume training
        'criterion': criterion  # Optional, save the last loss
    },
    f='../../assets/models/model_3.pth'
)

In [None]:
# load the checkpoint
checkpoint = torch.load('../../assets/models/model_3.pth', weights_only=False)

# model
model_3 = nn.Sequential(
    nn.Linear(4, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1),
    nn.Sigmoid()
)

# optimizer
optimizer = optim.SGD(model_3.parameters(), lr=0.01)

# insert values
model_3.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
criterion = checkpoint['criterion']
epoch = checkpoint['epoch']

In [None]:
# log
for param in model_3.state_dict().items():
    print(param)

('0.weight', tensor([[-0.4132, -0.3434, -0.2125,  0.4822],
        [-0.4512,  0.3472,  0.4492, -0.4743]]))
('0.bias', tensor([-0.4894, -0.4634]))
('2.weight', tensor([[-0.3045,  0.6727]]))
('2.bias', tensor([-0.1564]))


In [None]:
# log
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [None]:
# log
print(f"epoch : {epoch}")

epoch : 10
