In [1]:
# https://github.com/patrickloeber/pytorchTutorial/blob/master/17_save_load.py

import torch
import torch.nn as nn

# 3 DIFFERENT METHODS TO REMEMBER
- torch.save(arg, PATH) # can be model, tensor, or dictionary
- torch.load(PATH)
- torch.load_state_dict(arg)

# 2 DIFFERENT WAYS OF SAVING

In [2]:
# 1. lazy way: save whole model
# torch.save(model, PATH)

# model class must be defined somewhere
# model = torch.load(PATH)
# model.eval()

# 2. recommended way: save only the state_dict
# torch.save(model.state_dict(), PATH)

# model must be created again with parameters
# model = Model(*args, **kwargs)
# model.load_state_dict(torch.load(PATH))
# model.eval()

In [3]:
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

model = Model(n_input_features=6)

In [7]:
# train your model

# save all
for param in model.parameters():
    print(param)
    
# save and load entire model
FILE = "model.pth"
torch.save(model, FILE)

loaded_model = torch.load(FILE)
loaded_model.eval()

for param in loaded_model.parameters():
    print(param)
    
# save only state dict
torch.save(model.state_dict(), FILE)

print(model.state_dict())
loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(FILE)) # it takes the loaded dictionary, not the path file itself
loaded_model.eval()

print(loaded_model.state_dict())

Parameter containing:
tensor([[-0.1153, -0.1479, -0.0494,  0.3141, -0.3259,  0.3101]],
       requires_grad=True)
Parameter containing:
tensor([0.0738], requires_grad=True)
Parameter containing:
tensor([[-0.1153, -0.1479, -0.0494,  0.3141, -0.3259,  0.3101]],
       requires_grad=True)
Parameter containing:
tensor([0.0738], requires_grad=True)
OrderedDict([('linear.weight', tensor([[-0.1153, -0.1479, -0.0494,  0.3141, -0.3259,  0.3101]])), ('linear.bias', tensor([0.0738]))])
OrderedDict([('linear.weight', tensor([[-0.1153, -0.1479, -0.0494,  0.3141, -0.3259,  0.3101]])), ('linear.bias', tensor([0.0738]))])


In [9]:
# load checkpoint
lr = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

checkpoint = {
    "epoch": 90,
    "model_state": model.state_dict(),
    "optim_state": optimizer.state_dict()
}
print(optimizer.state_dict())
FILE = "checkpoint.pth"
torch.save(checkpoint, FILE)

model = Model(n_input_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr=0)

checkpoint = torch.load(FILE)
model.load_state_dict(checkpoint["model_state"])
optimizer.load_state_dict(checkpoint["optim_state"])
epoch = checkpoint["epoch"]

model.eval()
# or
# model.train()

print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.001, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1]}]}
{'state': {}, 'param_groups': [{'lr': 0.001, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1]}]}


In [10]:
# remember that you must call model.eval() to set dropout and batch normalization layers 
# to evaluation mode before running inference. failing to do this will yield 
# inconsistent inference results. if you wish to resuming training, 
# call model.train() to ensure these layers are in training mode.

# SAVING ON GPU/CPU

In [11]:
# 1) save on GPU, load on CPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

device = torch.device("cpu")
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location=device))

# 2) save on GPU, load on GPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.to(device)

# note: be sure to use the .to(torch.device("cuda")) function 
# on all model inputs, too!

# 3. save on CPU, load on GPU
torch.save(model.state_dict(), PATH)

device = torch.device("cuda")
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # choose whatever GPU device number you want
model.to(device)

# this loads the model to a given GPU device
# next, be sure to call model.to(torch.device("cuda")) to convert the model’s parameter tensors to CUDA tensors

AssertionError: Torch not compiled with CUDA enabled