### Save and Load Models

In [2]:
import torch
import torch.nn as nn

In [None]:
# methods: 
# saving the model
torch.save(args, PATH) # saving any dictionary with this model, the result is serialized not human readable.
# loading the model
model = torch.load(PATH)
model.eval()

#### STATE DICT ####
# saving the model with the parameters 

torch.save(model.state_dict(), PATH)
# model must be created again with parameters 

model = Model(*args, **kwargs) # create model object
model.load_state_dict(torch.load(PATH))
model.eval()

In [5]:
# creating a model class 
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred
    
# create model
model = Model(n_input_features=6)

# training 
# lazy model saving/loading 
FILE = 'model.pth'
#torch.save(model, FILE)

# load model 
model = torch.load(FILE)
#set it for validation

model.eval()

# inspect the params 

for params in model.parameters():
    print(params)

Parameter containing:
tensor([[ 0.0110, -0.1258, -0.1658, -0.1280,  0.2966, -0.2569]],
       requires_grad=True)
Parameter containing:
tensor([0.3263], requires_grad=True)


In [9]:
# Prefered
torch.save(model.state_dict(),FILE)
# loading the model: we need first to define it

loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(FILE))
loaded_model.eval()

Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [12]:
for params in loaded_model.parameters():
    print(params)

Parameter containing:
tensor([[ 0.0110, -0.1258, -0.1658, -0.1280,  0.2966, -0.2569]],
       requires_grad=True)
Parameter containing:
tensor([0.3263], requires_grad=True)


In [13]:
print(model.state_dict())

OrderedDict([('linear.weight', tensor([[ 0.0110, -0.1258, -0.1658, -0.1280,  0.2966, -0.2569]])), ('linear.bias', tensor([0.3263]))])


In [14]:
print(loaded_model.state_dict())

OrderedDict([('linear.weight', tensor([[ 0.0110, -0.1258, -0.1658, -0.1280,  0.2966, -0.2569]])), ('linear.bias', tensor([0.3263]))])


In [15]:
learning_rate = 0.01 
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [4808609376, 4811200784]}]}


In [16]:
# Saving checkpoints 

checkpoint = {
    'epoch': 90,
    'model_state': model.state_dict(),
    'optim_state': optimizer.state_dict()
}

torch.save(checkpoint, 'checkpoint.pth')

In [19]:
# loading checkpoint

loaded_checkpoint = torch.load('checkpoint.pth')
epoch = loaded_checkpoint['epoch']

# create the model object

model_c = Model(n_input_features=6)
optimizer = torch.optim.SGD(model_c.parameters(),lr=0)
model_c.load_state_dict(checkpoint['model_state']) # load the parameters in the model 
optimizer.load_state_dict(checkpoint['optim_state']) # load the params

print(optimizer.state_dict())
# the trainig with continue from that point on .... 

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [4813754928, 4835924640]}]}


In [None]:
# if the saving device differece from the loading device, you need to specify map_location=device.
# In case of saving on GPU and Loading on CPU 
# set device 
device = torch.device('cuda')
model.to(device)
# save the model 
torch.save(model.state_dict(),PATH)

# loading ... 
device = torch.device('cpu')
model = Model(*args, **kwargs)

model.load_state_dict(torch.load(PATH, map_location=device)) # specify the map location, give it the cpu