In [1]:
import torch
import torch.nn as nn

In [2]:
## Complete model (Lazy method)
# torch.save(model, PATH)

# load
# model = torch.load(PATH)
# model.eval()

In [3]:
# recommended way of saving
# only saving parameters
# torch.save(model.state_dict(), PATH)
# model = Model(*args, **kwargs)
# model.load_state_dict(torch.load(PATH))
# model.eval()

In [4]:
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred
    
model = Model(n_input_features=6)

# save model -- lazy method
FILE = 'model.pth' # ending .pth for pytorch
torch.save(model, FILE)

loaded_model = torch.load(FILE)
loaded_model.eval()

for param in loaded_model.parameters():
    print(param)


Parameter containing:
tensor([[-0.3883, -0.3417,  0.1780, -0.3056,  0.1198,  0.3989]],
       requires_grad=True)
Parameter containing:
tensor([0.0196], requires_grad=True)


In [5]:
# saving according to recommendation
FILE2 = 'model2.pth' # ending .pth for pytorch
torch.save(model.state_dict(), FILE2)

# load model
loaded_model2 = Model(n_input_features=6)
loaded_model2.load_state_dict(torch.load(FILE2))
loaded_model2.eval()

for param in loaded_model2.parameters():
    print(param)

Parameter containing:
tensor([[-0.3883, -0.3417,  0.1780, -0.3056,  0.1198,  0.3989]],
       requires_grad=True)
Parameter containing:
tensor([0.0196], requires_grad=True)


In [6]:
print(model.state_dict())

OrderedDict([('linear.weight', tensor([[-0.3883, -0.3417,  0.1780, -0.3056,  0.1198,  0.3989]])), ('linear.bias', tensor([0.0196]))])


## Saving a check point during training 

In [14]:
model = Model(n_input_features=6)
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

print(optimizer.state_dict())

# creating checkpoints during training
checkpoint = {
    "epoch": 90,
    "model_state": model.state_dict(),
    "optimizer_state": optimizer.state_dict()
}

torch.save(checkpoint, "checkpoint.pth")


loaded_checkpoint = torch.load("checkpoint.pth")
epoch = loaded_checkpoint["epoch"]

# must create model again
model = Model(n_input_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr=0) # lr doesn't matter, correct one will load

model.load_state_dict(checkpoint["model_state"])
optimizer.load_state_dict(checkpoint["optimizer_state"])

print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0, 1]}]}
{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0, 1]}]}


##### Things to keep in mind if using a GPU 

- you have to specify the maplocation in load_state_dict to the device
- load model then send it to cuda device