In [2]:
import torch
import torch.nn as nn

# Saving the Whole model

In [7]:
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

model = Model(n_input_features=6)
# train your model...

####################save all ######################################
print(' model parameters')
for param in model.parameters():
    print(param)
print('-'*50)

# save and load entire model

FILE = "model.pth"
torch.save(model, FILE)

loaded_model = torch.load(FILE, weights_only=False )
loaded_model.eval()

print('Loaded model parameters')

for param in loaded_model.parameters():
    print(param)


 model parameters
Parameter containing:
tensor([[-0.3281,  0.3887,  0.0708,  0.0638,  0.3653, -0.3094]],
       requires_grad=True)
Parameter containing:
tensor([-0.1175], requires_grad=True)
--------------------------------------------------
Loaded model parameters
Parameter containing:
tensor([[-0.3281,  0.3887,  0.0708,  0.0638,  0.3653, -0.3094]],
       requires_grad=True)
Parameter containing:
tensor([-0.1175], requires_grad=True)


# Saving the only weights and bias and resuming the model training

In [8]:
############save only state dict #########################

# save only state dict
FILE = "model.pth"
torch.save(model.state_dict(), FILE)

print(model.state_dict())
print('-'*50)

loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(FILE)) # it takes the loaded dictionary, not the path file itself
loaded_model.eval()

print(loaded_model.state_dict())
print('-'*50)



###########load checkpoint#####################
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

checkpoint = {
"epoch": 90,
"model_state": model.state_dict(),
"optim_state": optimizer.state_dict()
}
print(optimizer.state_dict())
print('-'*50)

FILE = "checkpoint.pth"
torch.save(checkpoint, FILE)

model = Model(n_input_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr=0)

checkpoint = torch.load(FILE)
model.load_state_dict(checkpoint['model_state'])
optimizer.load_state_dict(checkpoint['optim_state'])
epoch = checkpoint['epoch']

model.eval()
# - or -
# model.train()

print(optimizer.state_dict())

# Remember that you must call model.eval() to set dropout and batch normalization layers 
# to evaluation mode before running inference. Failing to do this will yield 
# inconsistent inference results. If you wish to resuming training, 
# call model.train() to ensure these layers are in training mode

OrderedDict([('linear.weight', tensor([[-0.3281,  0.3887,  0.0708,  0.0638,  0.3653, -0.3094]])), ('linear.bias', tensor([-0.1175]))])
--------------------------------------------------
OrderedDict([('linear.weight', tensor([[-0.3281,  0.3887,  0.0708,  0.0638,  0.3653, -0.3094]])), ('linear.bias', tensor([-0.1175]))])
--------------------------------------------------
{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'fused': None, 'params': [0, 1]}]}
--------------------------------------------------
{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'fused': None, 'params': [0, 1]}]}


# SAVING ON GPU/CPU 

In [13]:
# 1) Save on GPU, Load on CPU
PATH = 'model.pth'
device = torch.device("cuda")
model.to(device)
torch.save(model.state_dict(), PATH)

device = torch.device('cpu')
model = Model(n_input_features=6)
model.load_state_dict(torch.load(PATH, map_location=device))
model.eval()


Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [17]:
# 2) Save on GPU, Load on GPU
PATH = 'model.pth'
device = torch.device("cuda")

# Save model on GPU
model.to(device)
torch.save(model.state_dict(), PATH)

# Load model back on GPU
model = Model(n_input_features=6)
model.load_state_dict(torch.load(PATH))
model.to(device)
model.eval()


Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)

In [19]:
# 3) Save on CPU, Load on GPU
torch.save(model.state_dict(), PATH)

device = torch.device("cuda")
model = Model(n_input_features=6)
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want
model.to(device)

# This loads the model to a given GPU device. 
# Next, be sure to call model.to(torch.device('cuda')) to convert the model’s parameter tensors to CUDA tensors



Model(
  (linear): Linear(in_features=6, out_features=1, bias=True)
)