# Save and Load Models

(based on a tutorial by Python Engineer in Youtube)


## 2 DIFFERENT WAYS OF SAVING

### 1) lazy way: save whole model

disad: serialized data

### 2) recommended way: save only the state_dict

If we want to save our model and using then for inference, it'd be enough only to save the parameters.

Example:

## First way

In [6]:
import torch
import torch.nn as nn

In [16]:
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred
    

In [17]:
model = Model(n_input_features=6)

In [18]:
#train your model
#lazy method

FILE = "Model.pth" #pth is short for pytorch, it's common
torch.save(model, FILE)

In [19]:
#load model
model = torch.load(FILE)
model.eval()

for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.2049, -0.1840,  0.0058,  0.2033, -0.2228, -0.3114]],
       requires_grad=True)
Parameter containing:
tensor([-0.1716], requires_grad=True)


# Second way (recommanded)

In [21]:
model2 = Model(n_input_features=6)
FILE = "Model2.pth" #pth is short for pytorch, it's common
torch.save(model2, FILE)


torch.save(model2.state_dict(), FILE)

In [23]:
loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(FILE))

loaded_model.eval()
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.2049, -0.1840,  0.0058,  0.2033, -0.2228, -0.3114]],
       requires_grad=True)
Parameter containing:
tensor([-0.1716], requires_grad=True)


___________________
### but how the statedict looks like:

In [24]:
print(model.state_dict())

OrderedDict([('linear.weight', tensor([[-0.2049, -0.1840,  0.0058,  0.2033, -0.2228, -0.3114]])), ('linear.bias', tensor([-0.1716]))])


___________________
### Saving a Checkpoint during Training

E.g. we want to stop at some point in the training process and save a checkpoint.

In [26]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

print(optimizer.state_dict)

<bound method Optimizer.state_dict of SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)>


In [28]:
checkpoint = {
    "epoch": 90,
    "model_state": model.state_dict(),
    "optim_state": optimizer.state_dict()
}

torch.save(checkpoint, "checkpoint.pth")

In [29]:
#load the checkpoint
loaded_checkpoint = torch.load("checkpoint.pth")

#setup different models

epoch = loaded_checkpoint["epoch"]

#remember that you must create the model again
model = Model(n_input_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr = 0) #different lr even

model.load_state_dict(checkpoint["model_state"])
optimizer.load_state_dict(checkpoint["optim_state"])

print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'params': [0, 1]}]}


In [25]:
# Remember that you must call model.eval() to set dropout and batch normalization layers 
# to evaluation mode before running inference. Failing to do this will yield 
# inconsistent inference results. If you wish to resuming training, 
# call model.train() to ensure these layers are in training mode.

""" SAVING ON GPU/CPU 
# 1) Save on GPU, Load on CPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)
device = torch.device('cpu')
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location=device))



# 2) Save on GPU, Load on GPU
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.to(device)
# Note: Be sure to use the .to(torch.device('cuda')) function 
# on all model inputs, too!


# 3) Save on CPU, Load on GPU
torch.save(model.state_dict(), PATH)
device = torch.device("cuda")
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want
model.to(device)

# This loads the model to a given GPU device. 
# Next, be sure to call model.to(torch.device('cuda')) to convert the model’s parameter tensors to CUDA tensors
"""

' SAVING ON GPU/CPU \n# 1) Save on GPU, Load on CPU\ndevice = torch.device("cuda")\nmodel.to(device)\ntorch.save(model.state_dict(), PATH)\ndevice = torch.device(\'cpu\')\nmodel = Model(*args, **kwargs)\nmodel.load_state_dict(torch.load(PATH, map_location=device))\n# 2) Save on GPU, Load on GPU\ndevice = torch.device("cuda")\nmodel.to(device)\ntorch.save(model.state_dict(), PATH)\nmodel = Model(*args, **kwargs)\nmodel.load_state_dict(torch.load(PATH))\nmodel.to(device)\n# Note: Be sure to use the .to(torch.device(\'cuda\')) function \n# on all model inputs, too!\n# 3) Save on CPU, Load on GPU\ntorch.save(model.state_dict(), PATH)\ndevice = torch.device("cuda")\nmodel = Model(*args, **kwargs)\nmodel.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want\nmodel.to(device)\n# This loads the model to a given GPU device. \n# Next, be sure to call model.to(torch.device(\'cuda\')) to convert the model’s parameter tensors to CUDA tensors\n'