In [1]:
import torch
import torch.nn as nn

In [None]:
## Lazy option
# complete model
torch.save(model, PATH) # tensor, dictionary, models  
# uses Pickle to serialize them

# model class must be defined somewhere
model = torch.load(PATH)
model.eval()

In [None]:
## Recommended option
# state dict
torch.save(model.state_dict(), PATH)

# model must be created again with parameters
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()

In [2]:
# the model
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

model = Model(n_input_features=6)

In [5]:
## LAZY WAY - complete mode;
# train your model
FILE = 'model.pth'
torch.save(model, FILE)

# load the model
model = torch.load(FILE)
model.eval()

for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.3763, -0.2753, -0.2828,  0.1923,  0.0678, -0.0012]],
       requires_grad=True)
Parameter containing:
tensor([-0.3032], requires_grad=True)


In [8]:
## RECOMMENDED WAY - state_dict()
FILE = 'model_state_dict.pth'
torch.save(model.state_dict(), FILE)

# model must be created again with parameters
loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(FILE))
loaded_model.eval()

for param in model.parameters():
    print(param)
    
    
print(model.state_dict())

Parameter containing:
tensor([[-0.3763, -0.2753, -0.2828,  0.1923,  0.0678, -0.0012]],
       requires_grad=True)
Parameter containing:
tensor([-0.3032], requires_grad=True)
OrderedDict([('linear.weight', tensor([[-0.3763, -0.2753, -0.2828,  0.1923,  0.0678, -0.0012]])), ('linear.bias', tensor([-0.3032]))])


In [16]:
## CHECKPOINTS

learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
print(optimizer.state_dict())


checkpoint = {
    "epoch": 90,
    "model_state": model.state_dict(),
    "optim_state": optimizer.state_dict()
}

# torch.save(checkpoint, "checkpoint.pth")

loaded_checkpoint = torch.load('checkpoint.pth')
epoch = loaded_checkpoint['epoch']

model = Model(n_input_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr=0)

model.load_state_dict(checkpoint['model_state'])
optimizer.load_state_dict(checkpoint['optim_state'])

print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1]}]}
{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1]}]}


In [None]:
""" SAVING ON GPU/CPU 

# 1) Save on GPU, Load on CPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

device = torch.device('cpu')
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location=device))

# 2) Save on GPU, Load on GPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.to(device)

# Note: Be sure to use the .to(torch.device('cuda')) function 
# on all model inputs, too!

# 3) Save on CPU, Load on GPU
torch.save(model.state_dict(), PATH)

device = torch.device("cuda")
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want
model.to(device)

# This loads the model to a given GPU device. 
# Next, be sure to call model.to(torch.device('cuda')) to convert the model’s parameter tensors to CUDA tensors
"""